In [1]:
import pandas as pd
import numpy as np
import os.path as osp
from tqdm.notebook import tqdm

import tensorflow
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM
from keras.callbacks import EarlyStopping

import torch
import torch.nn as nn
from torch.autograd import Variable 
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from sklearn.preprocessing import FunctionTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('Updated_CRU.csv')
df.head()

Unnamed: 0,time,lat,lon,timeseries-tas-monthly-mean,year,month,dayofyear,quarter
0,1901-01-16,26.25,-122.75,16.322001,1901,1,16,1
1,1901-01-16,26.25,-122.25,16.322001,1901,1,16,1
2,1901-01-16,26.25,-121.75,16.322001,1901,1,16,1
3,1901-01-16,26.25,-121.25,16.322001,1901,1,16,1
4,1901-01-16,26.25,-120.75,16.322001,1901,1,16,1


In [3]:
df.shape

(6644352, 8)

In [4]:
df_ft = df[['time', 'timeseries-tas-monthly-mean']].copy()

In [5]:
df_ft['time'] = pd.to_datetime(df_ft['time'])
df_ft.set_index('time', inplace=True)
df_ft.rename(columns={'timeseries-tas-monthly-mean': 'temperature'}, inplace=True)
df_ft = df_ft['temperature'].resample('D').mean()
df_ft = df_ft.reset_index()

In [6]:
df_ft.shape

(44165, 2)

In [7]:
df_ft.head()

Unnamed: 0,time,temperature
0,1901-01-16,2.729315
1,1901-01-17,
2,1901-01-18,
3,1901-01-19,
4,1901-01-20,


In [8]:
df_ft['year'] = df_ft['time'].dt.year
df_ft["week"] = df_ft['time'].dt.isocalendar().week
df_ft['month'] = df_ft['time'].dt.month
df_ft['quarter'] = df_ft['time'].dt.quarter
df_ft['dayofyear'] = df_ft['time'].dt.dayofyear
df_ft['dayofweek'] = df_ft['time'].dt.dayofweek
df_ft["is_month_start"] = df_ft['time'].dt.is_month_start
df_ft["is_month_end"] = df_ft['time'].dt.is_month_end
df_ft["is_quarter_start"] = df_ft['time'].dt.is_quarter_start
df_ft["is_quarter_end"] = df_ft['time'].dt.is_quarter_end
df_ft["is_year_start"] = df_ft['time'].dt.is_year_start
df_ft["is_year_end"] = df_ft['time'].dt.is_year_end
df_ft["days_in_month"] = df_ft['time'].dt.days_in_month
df_ft["is_leap_year"] = df_ft['time'].dt.is_leap_year
df_ft['is_weekend'] = np.where(df_ft['dayofweek'].isin([5, 6]), 1, 0)

In [9]:
def sin_transformer(period):
    return FunctionTransformer(lambda x: np.sin(x / period * 2 * np.pi))


def cos_transformer(period):
    return FunctionTransformer(lambda x: np.cos(x / period * 2 * np.pi))


df_ft["sin_week"] = sin_transformer(7).fit_transform(df_ft['week'])
df_ft["sin_month"] = sin_transformer(12).fit_transform(df_ft['month'])
df_ft["sin_quarter"] = sin_transformer(4).fit_transform(df_ft['quarter'])
df_ft["sin_dayofyear"] = sin_transformer(365).fit_transform(df_ft['dayofyear'])
df_ft['sin_day_of_week'] = sin_transformer(7).fit_transform(df_ft['dayofweek'])

df_ft["cos_week"] = cos_transformer(7).fit_transform(df_ft['week'])
df_ft["cos_month"] = cos_transformer(12).fit_transform(df_ft['month'])
df_ft["cos_quarter"] = cos_transformer(4).fit_transform(df_ft['quarter'])
df_ft["cos_dayofyear"] = cos_transformer(365).fit_transform(df_ft['dayofyear'])
df_ft['cos_day_of_week'] = cos_transformer(7).fit_transform(df_ft['dayofweek'])

In [42]:
train_series = df_ft.loc[(df_ft['time'] >= '1900-01-16') & (df_ft['time'] < '2010-01-16'), :]
test_series = df_ft.loc[(df_ft['time'] >= '2010-01-16'), :]

In [43]:
X = train_series.drop(['time', 'temperature'], axis=1)
y = train_series['temperature'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [44]:
train_scaler = StandardScaler().fit(X_train)
target_scaler = StandardScaler().fit(y_train.reshape(-1, 1))

X_train = train_scaler.transform(X_train)
y_train = target_scaler.transform(y_train.reshape(-1, 1))

X_val = train_scaler.transform(X_test)
y_val = target_scaler.transform(y_test.reshape(-1, 1))

In [45]:
train_series.drop('time', axis=1, inplace=True)
test_series.drop('time', axis=1, inplace=True)

scaler = StandardScaler()
scaler.fit(train_series)
scaled_train = scaler.transform(train_series)
scaled_test = scaler.transform(test_series)


def df_to_x_y(df, window_size=1):
    X = []
    y = []
    for i in range(len(df) - window_size):
        row = [[a] for a in df[i: i + window_size, 0]]
        X.append(row)
        label = df[i + window_size, 0]
        y.append(label)
    return np.array(X), np.array(y)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_series.drop('time', axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_series.drop('time', axis=1, inplace=True)


In [46]:
time_steps = 60
X_1, y_1 = df_to_x_y(scaled_train, time_steps)

In [47]:
print(X_1.shape, y_1.shape)

(39737, 60, 1) (39737,)


In [48]:
39737 * 0.8

31789.600000000002

In [49]:
X_train, y_train = X_1[:31790], y_1[:31790]
X_test, y_test = X_1[31790:], y_1[31790:]

In [10]:
X_train_tensors = Variable(torch.Tensor(X_train))
X_test_tensors = Variable(torch.Tensor(X_val))

y_train_tensors = Variable(torch.Tensor(y_train))
y_test_tensors = Variable(torch.Tensor(y_val)) 

  X_train_tensors = Variable(torch.Tensor(X_train))


In [11]:
X_train_tensors_final = torch.reshape(X_train_tensors,   (X_train_tensors.shape[0], 1, X_train_tensors.shape[1]))
X_test_tensors_final = torch.reshape(X_test_tensors,  (X_test_tensors.shape[0], 1, X_test_tensors.shape[1])) 

In [12]:
print("Training Shape", X_train_tensors_final.shape, y_train_tensors.shape)
print("Testing Shape", X_test_tensors_final.shape, y_test_tensors.shape) 

Training Shape torch.Size([1046, 1, 25]) torch.Size([1046, 1])
Testing Shape torch.Size([262, 1, 25]) torch.Size([262, 1])


In [13]:
class LSTM1(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length):
        super(LSTM1, self).__init__()
        self.num_classes = num_classes #number of classes
        self.num_layers = num_layers #number of layers
        self.input_size = input_size #input size
        self.hidden_size = hidden_size #hidden state
        self.seq_length = seq_length #sequence length

        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                          num_layers=num_layers, batch_first=True) #lstm
        self.fc_1 =  nn.Linear(hidden_size, 128) #fully connected 1
        self.fc = nn.Linear(128, num_classes) #fully connected last layer

        self.relu = nn.ReLU()
    
    def forward(self,x):
        h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #hidden state
        c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #internal state
        # Propagate input through LSTM
        output, (hn, cn) = self.lstm(x, (h_0, c_0)) #lstm with input, hidden, and internal state
        hn = hn.view(-1, self.hidden_size) #reshaping the data for Dense layer next
        out = self.relu(hn)
        out = self.fc_1(out) #first Dense
        out = self.relu(out) #relu
        out = self.fc(out) #Final Output
        return out

In [14]:
num_epochs = 1000
learning_rate = 0.001

input_size = 25
hidden_size = 2
num_layers = 1

output_size = 1

In [15]:
lstm1 = LSTM1(output_size, input_size, hidden_size, num_layers, X_train_tensors_final.shape[1])

In [16]:
criterion = torch.nn.MSELoss()    # mean-squared error for regression
optimizer = torch.optim.Adam(lstm1.parameters(), lr=learning_rate) 

In [17]:
for epoch in range(num_epochs):
    outputs = lstm1.forward(X_train_tensors_final) #forward pass
    optimizer.zero_grad() #caluclate the gradient, manually setting to 0
 
    # obtain the loss function
    loss = criterion(outputs, y_train_tensors)
 
    loss.backward() #calculates the loss of the loss function
 
    optimizer.step() #improve from loss, i.e backprop
    if epoch % 100 == 0:
        print("Epoch: %d, loss: %1.5f" % (epoch, loss.item())) 

Epoch: 0, loss: 1.01582
Epoch: 100, loss: 0.39306
Epoch: 200, loss: 0.08312
Epoch: 300, loss: 0.04919
Epoch: 400, loss: 0.03550
Epoch: 500, loss: 0.02697
Epoch: 600, loss: 0.02303
Epoch: 700, loss: 0.02038
Epoch: 800, loss: 0.01915
Epoch: 900, loss: 0.01857
