In [5]:
import pandas as pd
import numpy as np
import torch
import os.path as osp
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import FunctionTransformer
from tqdm.notebook import tqdm
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [6]:
df = pd.read_csv('Updated_CRU.csv')
df.head()

Unnamed: 0,time,lat,lon,timeseries-tas-monthly-mean,year,month,dayofyear,quarter
0,1901-01-16,26.25,-122.75,16.322001,1901,1,16,1
1,1901-01-16,26.25,-122.25,16.322001,1901,1,16,1
2,1901-01-16,26.25,-121.75,16.322001,1901,1,16,1
3,1901-01-16,26.25,-121.25,16.322001,1901,1,16,1
4,1901-01-16,26.25,-120.75,16.322001,1901,1,16,1


In [7]:
df.shape

(6644352, 8)

In [8]:
df_ft = df[['time', 'timeseries-tas-monthly-mean']].copy()

In [9]:
df_ft.head()

Unnamed: 0,time,timeseries-tas-monthly-mean
0,1901-01-16,16.322001
1,1901-01-16,16.322001
2,1901-01-16,16.322001
3,1901-01-16,16.322001
4,1901-01-16,16.322001


In [10]:
df_ft['time'] = pd.to_datetime(df_ft['time'])
df_ft.set_index('time', inplace=True)
df_ft.rename(columns={'timeseries-tas-monthly-mean': 'temperature'}, inplace=True)
df_ft = df_ft['temperature'].resample('M').mean()
df_ft = df_ft.reset_index()

In [11]:
df_ft['year'] = df_ft['time'].dt.year
df_ft["week"] = df_ft['time'].dt.isocalendar().week
df_ft['month'] = df_ft['time'].dt.month
df_ft['quarter'] = df_ft['time'].dt.quarter
df_ft['dayofyear'] = df_ft['time'].dt.dayofyear
df_ft['dayofweek'] = df_ft['time'].dt.dayofweek
df_ft["is_month_start"] = df_ft['time'].dt.is_month_start
df_ft["is_month_end"] = df_ft['time'].dt.is_month_end
df_ft["is_quarter_start"] = df_ft['time'].dt.is_quarter_start
df_ft["is_quarter_end"] = df_ft['time'].dt.is_quarter_end
df_ft["is_year_start"] = df_ft['time'].dt.is_year_start
df_ft["is_year_end"] = df_ft['time'].dt.is_year_end
df_ft["days_in_month"] = df_ft['time'].dt.days_in_month
df_ft["is_leap_year"] = df_ft['time'].dt.is_leap_year
df_ft['is_weekend'] = np.where(df_ft['dayofweek'].isin([5, 6]), 1, 0)

In [12]:
def sin_transformer(period):
    return FunctionTransformer(lambda x: np.sin(x / period * 2 * np.pi))


def cos_transformer(period):
    return FunctionTransformer(lambda x: np.cos(x / period * 2 * np.pi))


df_ft["sin_week"] = sin_transformer(7).fit_transform(df_ft['week'])
df_ft["sin_month"] = sin_transformer(12).fit_transform(df_ft['month'])
df_ft["sin_quarter"] = sin_transformer(4).fit_transform(df_ft['quarter'])
df_ft["sin_dayofyear"] = sin_transformer(365).fit_transform(df_ft['dayofyear'])
df_ft['sin_day_of_week'] = sin_transformer(7).fit_transform(df_ft['dayofweek'])

df_ft["cos_week"] = cos_transformer(7).fit_transform(df_ft['week'])
df_ft["cos_month"] = cos_transformer(12).fit_transform(df_ft['month'])
df_ft["cos_quarter"] = cos_transformer(4).fit_transform(df_ft['quarter'])
df_ft["cos_dayofyear"] = cos_transformer(365).fit_transform(df_ft['dayofyear'])
df_ft['cos_day_of_week'] = cos_transformer(7).fit_transform(df_ft['dayofweek'])

In [13]:
df_ft.shape

(1452, 27)

In [14]:
train_series = df_ft.loc[(df_ft['time'] >= '1900-01-16') & (df_ft['time'] < '2010-01-16'), :]
test_series = df_ft.loc[(df_ft['time'] >= '2010-01-16'), :]

In [15]:
train_series.shape, test_series.shape

((1308, 27), (144, 27))

In [16]:
X = train_series.drop(['time', 'temperature'], axis=1)
y = train_series['temperature'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [17]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((1046, 25), (262, 25), (1046,), (262,))

In [18]:
train_scaler = StandardScaler().fit(X_train)
target_scaler = StandardScaler().fit(y_train.reshape(-1, 1))

X_train = train_scaler.transform(X_train)
y_train = target_scaler.transform(y_train.reshape(-1, 1))

X_val = train_scaler.transform(X_test)
y_val = target_scaler.transform(y_test.reshape(-1, 1))

In [80]:
class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return self.X.__len__()

    def __getitem__(self, idx):
        return np.array(self.X[idx], dtype=float), np.array(self.y[idx], dtype='float')

In [81]:
class TimeSeriesModel(nn.Module):
    def __init__(self, num_features):
        super(TimeSeriesModel, self).__init__()
        self.linear1 = nn.Linear(num_features, 128)
        self.linear2 = nn.Linear(128,64)
        self.linear3 = nn.Linear(64,16)
        self.linear4 = nn.Linear(16,1)

        self.dropout = nn.Dropout(0.5)
        self.activation1 = nn.ReLU()
        
    def forward(self, x):
        x = self.dropout(self.linear1(x))
        x = self.activation1(x)
        x = self.dropout(self.linear2(x))
        x = self.activation1(x)
        x = self.dropout(self.linear3(x))
        x = self.activation1(x)
        x = self.linear4(x)
        return x

In [82]:
# Hyperparameters
n_epochs = 1000
n_epochs_stop = 30
input_size = X_train.shape[1]
output_size = 1
batch_size = 64
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model_dir = 'models'

In [93]:
X_train.shape[1]

26

In [83]:
train_dataset = TimeSeriesDataset(X_train, y_train)
val_dataset = TimeSeriesDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [85]:
model = TimeSeriesModel(num_features=input_size)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [92]:
best_loss = np.inf
epochs_no_improve = 0
model_name = 'nn_model_new'
for epochs in range(1, n_epochs+1):
    train_loss = 0
    model.train()
    for data, target in train_loader:
        optimizer.zero_grad()
        data = torch.Tensor(data).to(device)
        target = target.reshape(-1,1)
        output = model(data.float())
        loss = criterion(output, target.float().to(device))
        if type(criterion) == torch.nn.modules.loss.MSELoss:
            loss = torch.sqrt(loss)
        loss.backward()
        optimizer.step()
        #scheduler.step()
        train_loss += loss.item()
    train_loss /= len(train_loader)
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for data, target in val_loader:
            data = torch.Tensor(data).to(device)
            target = target.reshape(-1,1)
            output = model(data.float())
            loss = criterion(output, target.float().to(device))
            if type(criterion) == torch.nn.modules.loss.MSELoss:
                loss = torch.sqrt(loss)
            val_loss += loss.item()
    val_loss /= len(val_loader)
    # early stopping
    if val_loss < best_loss:
        best_loss = val_loss
        #torch.save(model.state_dict(), osp.join(model_dir, '{}.pt'.format(model_name)))
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1
    if epochs_no_improve == n_epochs_stop:
        #print("Early stopping.")
        break
    print(f'Epoch {epochs} train loss: {round(train_loss,8)} val loss: {round(val_loss,8)}')
print('best loss: {}'.format(best_loss))

Epoch 1 train loss: 0.35079737 val loss: 13.86887703
Epoch 2 train loss: 0.35907044 val loss: 13.82889366
Epoch 3 train loss: 0.37290238 val loss: 13.93538094
Epoch 4 train loss: 0.37572849 val loss: 13.96545792
Epoch 5 train loss: 0.36588743 val loss: 13.97237892
Epoch 6 train loss: 0.37213832 val loss: 13.9424921
Epoch 7 train loss: 0.36875611 val loss: 13.90692348
Epoch 8 train loss: 0.3641164 val loss: 13.97084103
Epoch 9 train loss: 0.35896575 val loss: 13.97601223
Epoch 10 train loss: 0.38005316 val loss: 13.97062321
Epoch 11 train loss: 0.37502273 val loss: 13.96072044
Epoch 12 train loss: 0.34883172 val loss: 13.97363625
Epoch 13 train loss: 0.36469031 val loss: 13.95362186
Epoch 14 train loss: 0.36126663 val loss: 13.9281868
Epoch 15 train loss: 0.38015273 val loss: 13.93367882
Epoch 16 train loss: 0.36319965 val loss: 13.88624935
Epoch 17 train loss: 0.36246859 val loss: 13.94507599
Epoch 18 train loss: 0.35811867 val loss: 13.91037598
Epoch 19 train loss: 0.36750792 val loss