<a href="https://colab.research.google.com/github/brothermin00/JNU_2023/blob/main/LSTM_weather_2023.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import Modules

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive

In [None]:
import pandas as pd
import numpy as np
from tensorflow import keras
import matplotlib.pyplot as plt
import os

Download Dataset

In [None]:
!wget https://github.com/onebottlekick/JNU_dl/releases/download/lstm/jena_climate_2009_2016.csv

Show Dataset

In [None]:
pd.read_csv('jena_climate_2009_2016.csv')

Data Reader

In [None]:
class DataReader():
    def __init__(self, window_size):
        self.headers = []
        self.train_X, self.train_Y, self.test_X, self.test_Y = self.read_data(window_size)

        # 데이터 읽기가 완료되었습니다.
        # 읽어온 데이터의 정보를 출력합니다.
        print("\n\nData Read Done!")
        print("Training X Size : " + str(self.train_X.shape))
        print("Training Y Size : " + str(self.train_Y.shape))
        print("Test X Size : " + str(self.test_X.shape))
        print("Test Y Size : " + str(self.test_Y.shape) + '\n\n')

    def read_data(self, window_size):
        filename = 'jena_climate_2009_2016.csv'
        data = np.loadtxt(filename, delimiter=",", skiprows=1, usecols=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14))

        # data 수를 반으로 줄임
        data = data[:int(data.shape[0]*0.5), :]

        data = data - np.min(data, axis=0) + 0.0001
        data = data / np.max(data, axis=0)
        train_data = data[:int(len(data)*0.8)]
        test_data = data[int(len(data) * 0.8):]

        train_X, train_Y = self.windowing(train_data, window_size)
        test_X, test_Y = self.windowing(test_data, window_size)

        return train_X, train_Y, test_X, test_Y

    def windowing(self, array, window_size):
        X = []
        Y = []

        for i in range(len(array)-window_size*2):
            X.append(array[i:i+window_size])
            Y.append(array[i+window_size:i + window_size*2])

        return np.asarray(X), np.asarray(Y)

LSTM Model

In [None]:
model = keras.Sequential([
    keras.layers.LSTM(8, return_sequences=True),
    keras.layers.Dense(32),
    keras.layers.Dense(14),
])

Train Model

In [None]:
dr = DataReader(12)
EPOCHS = 50

model.compile(optimizer="adam", metrics=["mae"], loss="mse")

print("\n\n************ TRAINING START ************ ")
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
history = model.fit(dr.train_X, dr.train_Y, epochs=EPOCHS,
                    validation_data=(dr.test_X, dr.test_Y),
                    callbacks=[early_stop])

Plot function

In [None]:
def draw_scatter(prediction, label, num_feature=''):
    X = prediction / np.max(prediction, axis=0)
    Y = label / np.max(label, axis=0)

    if num_feature:
        X = X[:, :, num_feature]
        Y = Y[: ,:, num_feature]

    minval = min(np.min(X), np.min(Y))
    maxval = max(np.max(X), np.max(Y))

    fig = plt.figure(figsize=(8, 8))
    plt.title("Regression Result")
    plt.xlabel("Ground Truth")
    plt.ylabel("AI Predict")
    plt.scatter(X, Y)
    plt.plot([minval, maxval], [minval, maxval], "red")
    fig.savefig(f"result_{num_feature}.png")
    plt.show()

def draw_loss_graph(history):
    train_history = history.history["loss"]
    validation_history = history.history["val_loss"]
    fig = plt.figure(figsize=(8, 8))
    plt.title("Loss History")
    plt.xlabel("EPOCH")
    plt.ylabel("LOSS Function")
    plt.plot(train_history, "red", label='train')
    plt.plot(validation_history, 'blue', label='validation')
    fig.savefig("train_history.png")
    plt.legend()
    plt.show()

Show Results

In [None]:
draw_loss_graph(history)

In [None]:
draw_scatter(model(dr.test_X[:200]), dr.test_Y[:200])

In [None]:
draw_scatter(model(dr.test_X[:200]), dr.test_Y[:200], num_feature=1)