<a href="https://colab.research.google.com/github/ekingit/hackathon/blob/main/train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import time
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, Dataset
from itertools import chain
import matplotlib.pyplot as plt


from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Data, Model, Train

In [14]:
class Data(Dataset):
    def __init__(self, path, person_start, person_end, window_len):
        super().__init__()

        self.window_len = window_len
        df = pd.read_csv(path)
        person_ids = [str(i) for i in range(person_start,person_end)]
        if '2660' in person_ids:
            person_ids.remove('2660')
        self.df_person = df[person_ids]

        dataset = torch.tensor(self.df_person.values,dtype = torch.float)
        self.mean = dataset.mean(dim=0)
        self.std = dataset.std(dim=0)
        self.dataset = (dataset - self.mean)/self.std

    def __getitem__(self, index):
        tens = self.dataset[index:index+self.window_len]
        return tens

    def __len__(self):
        return len(self.dataset) - self.window_len

In [15]:
class local_LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super().__init__()
        self.gru = nn.GRU(input_size, hidden_size, num_layers,batch_first=True)
    def forward(self, X, H):#X=(batch_size,seq_len, num_features),H,c=(num_layers,batch_size,hidden_size)
        X, H = self.gru(X, H) #X=(batch_size,seq_len,num_features)
        return X, H

class linear_layer(nn.Module):
    def __init__(self, input_size,output_size):
        super().__init__()
        self.linear = nn.Linear(input_size,output_size)
        self.act = nn.ReLU()
    def forward(self, X):
      X = self.linear(X)
      return X

In [16]:
def autoregressive(X, model_enc, model_dec, H, washout, seq_len, window_len):
    X_in = X[:,:seq_len,:]#X_in.shape=(batch_size,seq_len,input_size)
    X_out, H = model_enc(X_in, H)
    X_out = model_dec(X_out) #X_out.shape=(batch_size,seq_len,input_size)
    Y_hat = []
    Y_hat.append(X_out[:,washout:])#Y_hat.shape=(batch_size,seq_len-washout,50)
    for i in range(window_len-seq_len-1):
      X_in = X_out[:,-1,:].unsqueeze(1) #(batch_size,1,50)
      X_out, H = model_enc(X_in, H)
      X_out = model_dec(X_out)
      Y_hat.append(X_out)
    Y_hat = torch.cat(Y_hat,1).squeeze(-1)
    return Y_hat #(batch_size,window_len-washout-1)


def train(model1, model2, num_layers, hidden_size, dl, seq_len, window_len, washout, optimizer, loss_fn, device):
    model1.to(device)
    model2.to(device)
    model1.train()
    model2.train()
    train_loss = 0
    for X in dl:
        X = X.to(device)
        Y_hat = []
        optimizer.zero_grad()
        H = torch.zeros(num_layers,X.shape[0], hidden_size,device=device)
        Y_hat = autoregressive(X, model1, model2, H, washout, seq_len, window_len)
        Y = X[:,washout+1:,:] #(batch_size,window_len-washout-1)
        loss = loss_fn(Y_hat, Y)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss = train_loss/(len(dl.dataset)*(window_len-washout-1))
    return train_loss

# Run!

In [9]:
#params
input_size = 4124
hidden_size = 2048
num_layers = 2
seq_len = 24*4
washout = 0
window_len = 24*5
lr = 1e-3
batch_size = 64

In [17]:
path = '/content/drive/MyDrive/hackathon/smart_meters_london_2013.csv'
data = Data(path,0,input_size+1,window_len)
dl = DataLoader(data, batch_size=batch_size, shuffle=True)

encoder = local_LSTM(input_size, hidden_size,num_layers)
decoder = linear_layer(hidden_size,input_size)

encoder_params = encoder.parameters()
decoder_params = decoder.parameters()
all_params = chain(encoder_params, decoder_params)
optimizer = torch.optim.Adam(all_params, lr)
loss_fn = nn.MSELoss(reduction='sum')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
encoder.to(device)
decoder.to(device)

linear_layer(
  (linear): Linear(in_features=2048, out_features=4124, bias=True)
  (act): ReLU()
)

In [11]:
best_loss = 5000
for epoch in range(20):
  train_loss = train(encoder,decoder, num_layers, hidden_size, dl, seq_len, window_len, washout, optimizer, loss_fn, device)
  print(train_loss)
  if train_loss < best_loss:
    best_loss = train_loss
    torch.save(encoder.state_dict(), '/content/drive/MyDrive/hackathon/encoder_scaled4.pth')
    torch.save(decoder.state_dict(), '/content/drive/MyDrive/hackathon/decoder_scaled4.pth')

2892.8223097572363
2266.920084422658
1878.7560700669158
1663.8306216931217
1358.9469975490197
1657.1610255213197
2096.399960122938
1456.9891583022097
1268.1685340802987
1150.442287192655
1067.1647180399939
1000.6438278089014
4215.9065301120445
3980.6620117491443
6232.850914254591
105922.0692071273
395051.96809835045
611630.621101774
500458.3158418923
465349.9376283847


#smaller model

In [18]:
#params
input_size = 4124
hidden_size = 2048
num_layers = 1
seq_len = 24*4
washout = 0
window_len = 24*5
lr = 1e-3
batch_size = 64

In [19]:
best_loss = 5000
for epoch in range(20):
  train_loss = train(encoder,decoder, num_layers, hidden_size, dl, seq_len, window_len, washout, optimizer, loss_fn, device)
  print(train_loss)
  if train_loss < best_loss:
    best_loss = train_loss
    torch.save(encoder.state_dict(), '/content/drive/MyDrive/hackathon/encoder_scaled.pth')
    torch.save(decoder.state_dict(), '/content/drive/MyDrive/hackathon/decoder_scaled.pth')

2906.7991013071896
2177.394508636788
1834.5383986928105
2303.1292960239653
1678.19302054155
1744.1855547774665
1542.7292678182384
32720.909030695613
658092.2163709928
586571.8916900094
555595.1873015873


KeyboardInterrupt: 