In [None]:
# Reference: 
# all code borrowed from https://www.youtube.com/watch?v=NGzQpphf_Vc
# for personal studies purposes only

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [4]:
names = ['year', 'month', 'day', 'dec_year', 'sn_value', 'sn_error', 'obs_num', 'unused1']
df = pd.read_csv(
    'https://data.heatonresearch.com/data/t81-558/SN_d_tot_V2.0.csv',
    sep = ';',
    header = None,
    names = names,
    na_values = ['-1'],
    index_col = False,
)

In [5]:
df.head()

Unnamed: 0,year,month,day,dec_year,sn_value,sn_error,obs_num,unused1
0,1818,1,1,1818.001,-1,,0,1
1,1818,1,2,1818.004,-1,,0,1
2,1818,1,3,1818.007,-1,,0,1
3,1818,1,4,1818.01,-1,,0,1
4,1818,1,5,1818.012,-1,,0,1


In [9]:
start_id = max(df[df['obs_num'] == 0].index.tolist()) + 1
df = df[start_id:].copy()

df['sn_value'] = df['sn_value'].astype(float)
df_train = df.query('year < 2000')
df_test = df.query('year >= 2000')

spots_train = df_train['sn_value'].to_numpy().reshape(-1, 1)
spots_test = df_test['sn_value'].to_numpy().reshape(-1, 1)

scaler = StandardScaler()
spots_train = scaler.fit_transform(spots_train).flatten().tolist()
spots_test = scaler.transform(spots_test).flatten().tolist()

In [13]:
spots_train[:5], spots_test[:5]

([3.370924724576519,
  1.9389109571846703,
  2.3824550444299333,
  3.3582520363695116,
  2.293746226980881],
 [-0.20277334979959863,
  -0.1520825969715686,
  -0.08871915593653104,
  0.10137116716858159,
  0.2661161138596792])

In [18]:
SEQUENCE_SIZE = 10

def to_sequence(seq_size, obs):
    x = []
    y = []
    for i in range(len(obs) - seq_size):
        window = obs[i: i + seq_size]
        after_window = obs[i + seq_size]
        x.append(window)
        y.append(after_window)
    return torch.tensor(x, dtype = torch.float32).view(-1, seq_size, 1), torch.tensor(y, dtype = torch.float32).view(-1, 1)

x_train, y_train = to_sequence(SEQUENCE_SIZE, spots_train)
x_test, y_test = to_sequence(SEQUENCE_SIZE, spots_test)

train_dataset = TensorDataset(x_train, y_train)
train_loader = DataLoader(train_dataset, batch_size = 32, shuffle = True)

test_dataset = TensorDataset(x_test, y_test)
test_loader = DataLoader(test_dataset, batch_size = 32, shuffle = True)



In [21]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout = 0.1, max_len = 5000):
        
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p = dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype = torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10_000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)
    
    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)
    
    


In [23]:
class TransformerModel(nn.Module):
    def __init__(self, input_dim = 1, d_model = 64, nhead = 4, num_layers = 2, dropout = 0.2):
        super(TransformerModel, self).__init__()

        self.encoder = nn.Linear(input_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model, dropout)
        encoder_layers = nn.TransformerEncoderLayer(d_model, nhead)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.decoder = nn.Linear(d_model, 1)
    
    def forward(self, x):
        x = self.encoder(x)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)
        x = self.decoder(x[:, -1, :])
        return x
    
model = TransformerModel().to('cpu')

In [25]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
scheduler = ReduceLROnPlateau(optimizer, 'min', factor = 0.5, patience = 3, verbose = True)

epochs = 1000 
early_stop_count = 0
min_val_loss = float('inf')

for epoch in range(epochs):
    model.train()
    for batch in train_loader:
        x_batch, y_batch = batch
        x_batch, y_batch = x_batch.to('cpu'), y_batch.to('cpu')

        optimizer.zero_grad()
        outputs = model(x_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

    model.eval()
    val_losses = []
    loss = criterion(outputs, y_batch)
    val_losses.append(loss.item())


    val_loss = np.mean(val_losses)
    scheduler.step(val_loss)

    if val_loss < min_val_loss:
        min_val_loss = val_loss
        early_stop_count = 0
    else:
        early_stop_count += 1

    if early_stop_count >= 5:
        print('Early Stopping')
        break

    print(f'Epoch {epoch + 1} / {epochs}, validation loss = {val_loss: .4f}')

Epoch 1 / 1000, validation loss =  0.0402
Epoch 2 / 1000, validation loss =  0.0576
Epoch 3 / 1000, validation loss =  0.0509
Epoch 4 / 1000, validation loss =  0.0516
Epoch 00005: reducing learning rate of group 0 to 5.0000e-04.
Epoch 5 / 1000, validation loss =  0.0860
Early Stopping
