In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import numpy as np
import urllib.request
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
from io import StringIO
from matplotlib import pyplot as plt
from datetime import timedelta
from datetime import datetime
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, SimpleRNN
from keras.layers import LSTM,Dense ,Dropout

In [2]:
def get_melbourne_data() -> pd.DataFrame:
    '''
    Returns a dataframe of the melbourne data set.
    :return: pd.DataFrame
    '''

    # URL of the raw csv data to download
    raw_url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-min-temperatures.csv"

    # Get the earthquake data from the API
    response = urllib.request.urlopen(raw_url)

    # Decode earthquake data
    response = response.read().decode('utf-8')

    # Return as a pandas dataframe
    data = pd.read_csv(StringIO(response))

    # Cast the date column to datetime
    data['Date'] = pd.to_datetime(data['Date'])

    return data

time_serie =get_melbourne_data()

In [3]:
def split_train_test_data(melbourne_data: pd.DataFrame, split_year: str="1987") -> (pd.DataFrame, pd.DataFrame):
    '''
    Split the melbourne data into a training dataframe and a test dataframe.
    The training data is composed of all temperature points strictly anterior to the given split year.
    The test data is composed of all the points posterior or equal to the split year.
    :param melbourne_data: pd.DataFrame, with at least column ['Date']
    :param split_year: str, the year to split the data on
    :return: (pd.DataFrame, pd.DataFrame)
    '''

    # Format split year variable
    split_year = "{}".format(int(split_year) - 1)

    # Trainings data. Data anterior to the given split year
    train_data = melbourne_data.loc[:split_year]

    # Test data. Data posterior or equal to the given split year
    test_data = melbourne_data.loc[split_year:]

    return train_data, test_data

In [4]:
def build_training_point(data, t_str, history_days=64, horizon_days=1):
    '''
    :param data:
    :param t_str:
    :param history_days:
    :param horizon_days:
    :return:
    '''

    # Cast for indexing
    t_datetime = datetime.strptime(t_str, "%Y-%m-%d 00:00:00")

    # Create training example (x,y)
    try:
        x = data.loc[t_datetime - timedelta(days=history_days - 1):t_datetime]
        y = data.loc[t_datetime + timedelta(days=1):t_datetime + timedelta(days=horizon_days)]
    except KeyError:
        raise KeyError("The date {} is not in the data".format(t_str))

    # Return
    return x, y

In [5]:
def create_training_points(data, history_days, horizon_days):
    '''
    :param data:
    :param history_days:
    :param horizon_days:
    :return:
    '''
    X = []
    Y = []
    for t in data.index[history_days:(len(data) - horizon_days)]:
        try:
            x, y = build_training_point(data, str(t), history_days=history_days, horizon_days=horizon_days)
            if (len(x) == history_days) & (len(y) == horizon_days):
                X.append(x)
                Y.append(y)
        except KeyError:
            continue
    X = np.stack(X)
    Y = np.stack(Y)
    return X, Y


In [6]:
def split_data(year):
    split_date = pd.datetime(year,1,1)
    train_data = time_serie.loc[time_serie['Date'] < split_date]
    test_data = time_serie.loc[time_serie['Date'] >= split_date]

    scaler = MinMaxScaler()
    train_data[["Temp"]] = scaler.fit_transform(train_data[["Temp"]] )
    test_data[["Temp"]] = scaler.fit_transform(test_data[["Temp"]] )

    # Index
    train_data.set_index('Date', inplace=True)
    test_data.set_index('Date', inplace=True)
    return train_data, test_data


In [21]:
from keras.layers import Bidirectional 
from keras.layers import Input, LSTM, Dense
from keras.models import Model
def model_LSTM(history_days,horizon_days):
    model_input = Input(shape=(history_days, 1))
    model = Sequential()
    z=model
    z = Bidirectional(LSTM(units=30, return_sequences=True))(model_input)
    z = Dropout(0.2)(z)
    z= LSTM(units= 30 , return_sequences=True)(z)
    z= Dropout(0.2)(z)
    z = LSTM(units= 30 , return_sequences=True)(z)
    z=Dropout(0.2)(z)
    z=LSTM(units= 30)(z)
    z=Dropout(0.2)(z)
    z= Dense(horizon_days,activation='linear')(z)
    Model_Lstm= Model(inputs=model_input, outputs=z)
    return Model_Lstm
model_3m = model_LSTM(90, 30)
model_6m = model_LSTM(180, 30)
model_12m = model_LSTM(365, 30)



In [22]:
train_data, test_data = split_data(1987)
X_train, Y_train = create_training_points(train_data, history_days=90, horizon_days=30)
model_3m.compile(optimizer='adam', loss='mse')
model_3m.fit(X_train, Y_train, epochs=32, batch_size=32, verbose=1)

Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32


<keras.callbacks.History at 0x147904aab20>

In [24]:
train_data, test_data = split_data(1987)
X_test, Y_test = create_training_points(test_data, history_days=90, horizon_days=30)
y_pred_3m =model_3m.predict(X_test)
print("1987, k=3: ", mean_squared_error(y_pred_3m[0], Y_test[0]))

1987, k=3:  0.02679071054359822


In [26]:
train_data, test_data = split_data(1987)
X_train, Y_train = create_training_points(train_data, history_days=180, horizon_days=30)
model_6m.compile(optimizer='adam', loss='mse')
model_6m.fit(X_train, Y_train, epochs=32, batch_size=32, verbose=1)

Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32


<keras.callbacks.History at 0x1479d84ba00>

In [27]:
train_data, test_data = split_data(1987)
X_test, Y_test = create_training_points(test_data, history_days=180, horizon_days=30)
y_pred_6m = model_6m.predict(X_test)
print("1987, k=6: ", mean_squared_error(y_pred_6m[0], Y_test[0]))


1987, k=6:  0.008257268078829852


In [28]:
train_data, test_data = split_data(1987)
X_train, Y_train = create_training_points(train_data, history_days=365, horizon_days=30)
model_12m.compile(optimizer='adam', loss='mse')
model_12m.fit(X_train, Y_train, epochs=32, batch_size=32, verbose=1)

Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32


<keras.callbacks.History at 0x147aac45280>

In [29]:
train_data, test_data = split_data(1987)
X_test, Y_test = create_training_points(test_data, history_days=365, horizon_days=30)
y_pred_12m = model_12m.predict(X_test)
print("1987, k=12: ", mean_squared_error(y_pred_12m[0], Y_test[0]))

1987, k=12:  0.027797010512636673


In [37]:
train_data, test_data = split_data(1988)
X_test, Y_test = create_training_points(test_data, history_days=90, horizon_days=30)
y_pred_3m = model_3m.predict(X_test)
print("1988, k=3: ", mean_squared_error(y_pred_3m[0], Y_train[1]))

1988, k=3:  0.04021030996877787


In [38]:
train_data, test_data = split_data(1988)
X_test, Y_test = create_training_points(test_data, history_days=180, horizon_days=30)
y_pred_6m = model_6m.predict(X_test)
print("1988, k=6: ", mean_squared_error(y_pred_6m[0], Y_train[1]))


1988, k=6:  0.15121333047448135


In [48]:
train_data, test_data = split_data(1988)
X_test, Y_test = create_training_points(test_data, history_days=365, horizon_days=30)
y_pred_12m = model_12m.predict(X_test)
print("1988, k=12: ", mean_squared_error(y_pred_12m[0], Y_train[1]))


1988, k=12:  0.018993083323516693


In [40]:
train_data, test_data = split_data(1989)
X_test, Y_test = create_training_points(test_data, history_days=90, horizon_days=30)
y_pred_3m = model_3m.predict(X_test)
print("1989, k=3: ", mean_squared_error(y_pred_3m[0], Y_train[2]))

1989, k=3:  0.04610006463221635


In [41]:
train_data, test_data = split_data(1989)
X_test, Y_test = create_training_points(test_data, history_days=180, horizon_days=30)
y_pred_6m = model_6m.predict(X_test)
print("1989, k=6: ", mean_squared_error(y_pred_6m[0], Y_train[2]))

1989, k=6:  0.16455930190496026


In [42]:
train_data, test_data = split_data(1989)
X_test, Y_test = create_training_points(test_data, history_days=365, horizon_days=30)
y_pred_12m = model_12m.predict(X_test)
print("1989, k=12: ", mean_squared_error(y_pred_12m[0], Y_train[2]))

1989, k=12:  0.01989014650220999


|   *Evaluation year* 	| *Next 3 months* 	| *Next 6 months* 	| *Next 12 months* 	|
|:---------------------:	|:-----------------:	|:-----------------:	|:------------------:	|
|        *1987*       	|       mse =  0.0267	|    mse =  0.008257   	|     mse =  0.0277   	|
|        *1988*       	|       mse =  0.0402	|    mse =  0.1512    	|     mse =  0.0189	|
|        *1989*       	|       mse =  0.0461	|    mse =  0.1645   	|     mse =  0.0198  	|