In [1]:
%cd '/content/drive/MyDrive/Advanced Research Methods/Homework 2'

/content/drive/MyDrive/Advanced Research Methods/Homework 2


In [2]:
import pandas as pd
import random, math
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from scipy import fftpack
import torch.utils.data as utils
import os
from pathlib import Path
from sklearn.metrics import mean_absolute_percentage_error
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

In [3]:
def reshape(root):

 tps_df = pd.read_pickle(root)

 reshaped_tps_df = pd.DataFrame()
 reshaped_tps_df['TIME'] = tps_df.time.unique()
 for seg in tps_df.segmentID.unique():
     column = tps_df[tps_df['segmentID'] == seg][['time','TrafficIndex_GP']].drop_duplicates(subset=['time'])
     column.columns = ['TIME', str(seg)]
     reshaped_tps_df = reshaped_tps_df.join(column.set_index('TIME'), on='TIME')

 reshaped_tps_df = reshaped_tps_df.set_index('TIME')
 return reshaped_tps_df



In [4]:
train_data = reshape('tps_df.pkl')


In [5]:
class TrafficForecast(Dataset):
    def __init__(self,df, window, horizon):
        self.window = window
        self.horizon = horizon
        self.df = df
        self.inputs = []
        self.targets = []

        # df = pd.read_pickle(self.pkl_path)
        # df['time'] = pd.to_datetime(df['time'])
        # df['unix_timestamp'] = df['time'].astype(int) / 10**9
        # df['dow'] = df['time'].dt.weekday
        # df['hour'] = df['time'].dt.hour
        # # df['min'] = df['time'].dt.minute
        # self.df = df.sample(frac=1).reset_index(drop=True)
        # self.seg_ids = self.df['segmentID'].unique()

        self.setup_forecast()

    def setup_forecast(self):
        for segid in self.df.columns:
          self.df = self.df.fillna(method="ffill")
          TI_series = self.df[segid].values

          for t in range(0,len(TI_series)-(self.window+self.horizon)):
              x = TI_series[t:t+self.window]
              y = TI_series[t+self.window:(t+self.window+self.horizon)]


              self.inputs.append(x)
              self.targets.append(y)

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self,idx):
        X = torch.tensor(self.inputs[idx],dtype=torch.float32).reshape(self.window,1)
        y = torch.tensor(self.targets[idx],dtype=torch.float32)

        return {'inputs':X,'outputs':y}


In [6]:
tobj = TrafficForecast(train_data,36,12)


In [7]:
for idx, data in enumerate(tobj):
  print (data['inputs'].shape, data['outputs'].shape)
  if idx == 0:
    break


torch.Size([36, 1]) torch.Size([12])


In [11]:
bs = 200
dataloader_train = DataLoader(tobj, batch_size=bs, shuffle=False, num_workers=4,drop_last=True)



In [8]:
from models import MyLSTM, MyGRU, MyRNNCell, MyRNN

In [9]:
model = MyGRU(n_inputs=1, n_hidden=10, n_rnnlayers=2, n_outputs=12)
# model.to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [12]:
mean_loss = []
losses = []

n_epochs = 20

for it in tqdm(range(n_epochs)):
  # zero the parameter gradients
  for i_batch, sample_batched in enumerate(dataloader_train):
    optimizer.zero_grad()
    outputs = model(sample_batched['inputs'])
    loss = criterion(outputs, sample_batched['outputs'])
    losses.append(loss.item())
    loss.backward()
    optimizer.step()



  mean_loss.append(np.mean(losses))

  if (it+1) % 1 == 0:
    print(f'Epoch {it+1}/{n_epochs}, Training Loss: {np.mean(losses):.4f}')

  self.pid = os.fork()
  5%|▌         | 1/20 [05:41<1:48:15, 341.88s/it]

Epoch 1/20, Training Loss: 0.0191


 10%|█         | 2/20 [11:26<1:42:58, 343.23s/it]

Epoch 2/20, Training Loss: 0.0124


 15%|█▌        | 3/20 [17:21<1:38:48, 348.71s/it]

Epoch 3/20, Training Loss: 0.0102


 20%|██        | 4/20 [23:07<1:32:43, 347.73s/it]

Epoch 4/20, Training Loss: 0.0090


 25%|██▌       | 5/20 [28:59<1:27:19, 349.31s/it]

Epoch 5/20, Training Loss: 0.0084


 30%|███       | 6/20 [34:48<1:21:30, 349.33s/it]

Epoch 6/20, Training Loss: 0.0079


 35%|███▌      | 7/20 [40:40<1:15:48, 349.92s/it]

Epoch 7/20, Training Loss: 0.0076


 40%|████      | 8/20 [46:28<1:09:53, 349.45s/it]

Epoch 8/20, Training Loss: 0.0073


 45%|████▌     | 9/20 [52:13<1:03:49, 348.15s/it]

Epoch 9/20, Training Loss: 0.0071


 50%|█████     | 10/20 [58:00<57:57, 347.73s/it] 

Epoch 10/20, Training Loss: 0.0070


 55%|█████▌    | 11/20 [1:03:46<52:03, 347.09s/it]

Epoch 11/20, Training Loss: 0.0068


 60%|██████    | 12/20 [1:09:30<46:09, 346.18s/it]

Epoch 12/20, Training Loss: 0.0067


 65%|██████▌   | 13/20 [1:15:22<40:34, 347.85s/it]

Epoch 13/20, Training Loss: 0.0066


 70%|███████   | 14/20 [1:21:08<34:44, 347.48s/it]

Epoch 14/20, Training Loss: 0.0065


 75%|███████▌  | 15/20 [1:27:17<29:28, 353.80s/it]

Epoch 15/20, Training Loss: 0.0065


 80%|████████  | 16/20 [1:33:11<23:35, 353.99s/it]

Epoch 16/20, Training Loss: 0.0064


 85%|████████▌ | 17/20 [1:39:02<17:39, 353.03s/it]

Epoch 17/20, Training Loss: 0.0064


 90%|█████████ | 18/20 [1:44:51<11:43, 351.81s/it]

Epoch 18/20, Training Loss: 0.0063


 95%|█████████▌| 19/20 [1:50:35<05:49, 349.60s/it]

Epoch 19/20, Training Loss: 0.0063


100%|██████████| 20/20 [1:56:18<00:00, 348.91s/it]

Epoch 20/20, Training Loss: 0.0062





In [14]:
torch.save(model.state_dict(), 'grumodel.pt')

In [15]:
class Test(Dataset):
    def __init__(self,df):

        self.df = df
        self.inputs = []

        self.setup_forecast()

    def setup_forecast(self):


        for segid in self.df.columns:
          self.df = self.df.fillna(method="ffill")
          TI_series = self.df[segid].values
          self.inputs.append(TI_series)

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self,idx):
        X = torch.tensor(self.inputs[idx],dtype=torch.float32).reshape(-1,1)


        return {'inputs':X}

In [16]:
def run_files(i,tobj):

    pkl_file= (pkl_files[i])
    t_value = reshape(os.path.join('Test',pkl_file))
    tobj_ = Test(t_value)
    dfp = pd.DataFrame()
    for idx,data in enumerate(tobj_):
      input_data = data['inputs'].reshape(-1,36,1)
      preds = model(input_data).detach().cpu().numpy()[0]
      dfp[idx] = preds
    return dfp


In [17]:
pkl_files = os.listdir('Test')
print(pkl_files)

['tps_6.pkl', 'tps_9.pkl', 'tps_2.pkl', 'tps_14.pkl', 'tps_4.pkl', 'tps_5.pkl', 'tps_3.pkl', 'tps_12.pkl', 'tps_7.pkl', 'tps_13.pkl', 'tps_10.pkl', 'tps_1.pkl', 'tps_11.pkl', 'tps_15.pkl', 'tps_8.pkl']


In [18]:
op0 = run_files(0,tobj)
print (op0)
op0.index = pd.date_range(start='2020-06-07 11:15:00', end='2020-06-07 14:00:00', freq='15min').astype(int) / 10**9
op0.columns = train_data.columns

op1 = run_files(1,tobj)
op1.index = pd.date_range(start='2020-06-10 14:15:00', end='2020-06-10 17:00:00', freq='15min').astype(int) / 10**9
op1.columns = train_data.columns

op2 = run_files(2,tobj)
op2.index = pd.date_range(start='2020-06-03 7:15:00', end='2020-06-03 10:00:00', freq='15min').astype(int) / 10**9
op2.columns = train_data.columns

op3 = run_files(3,tobj)
op3.index = pd.date_range(start='2020-06-15 19:15:00', end='2020-06-15 22:00:00', freq='15min').astype(int) / 10**9
op3.columns = train_data.columns


op4 = run_files(4,tobj)
op4.index = pd.date_range(start='2020-06-05 9:15:00', end='2020-06-05 12:00:00', freq='15min').astype(int) / 10**9
op4.columns = train_data.columns

op5 = run_files(5,tobj)
op5.index = pd.date_range(start='2020-06-06 10:15:00', end='2020-06-06 13:00:00', freq='15min').astype(int) / 10**9
op5.columns = train_data.columns

op6 = run_files(6,tobj)
op6.index = pd.date_range(start='2020-06-04 8:15:00', end='2020-06-04 11:00:00', freq='15min').astype(int) / 10**9
op6.columns = train_data.columns

op7 = run_files(7,tobj)
op7.index = pd.date_range(start='2020-06-13 17:15:00', end='2020-06-13 20:00:00', freq='15min').astype(int) / 10**9
op7.columns = train_data.columns

op8 = run_files(8,tobj)
op8.index = pd.date_range(start='2020-06-08 12:15:00', end='2020-06-08 15:00:00', freq='15min').astype(int) / 10**9
op8.columns = train_data.columns

op9 = run_files(9,tobj)
op9.index = pd.date_range(start='2020-06-14 18:15:00', end='2020-06-14 21:00:00', freq='15min').astype(int) / 10**9
op9.columns = train_data.columns

op10 = run_files(10,tobj)
op10.index = pd.date_range(start='2020-06-11 15:15:00', end='2020-06-11 18:00:00', freq='15min').astype(int) / 10**9
op10.columns = train_data.columns

op11 = run_files(11,tobj)
op11.index = pd.date_range(start='2020-06-02 6:15:00', end='2020-06-02 9:00:00', freq='15min').astype(int) / 10**9
op11.columns = train_data.columns

op12 = run_files(12,tobj)
op12.index = pd.date_range(start='2020-06-12 16:15:00', end='2020-06-12 19:00:00', freq='15min').astype(int) / 10**9
op12.columns = train_data.columns

op13 = run_files(13,tobj)
op13.index = pd.date_range(start='2020-06-16 20:15:00', end='2020-06-16 23:00:00', freq='15min').astype(int) / 10**9
op13.columns = train_data.columns

op14 = run_files(14,tobj)
op14.index = pd.date_range(start='2020-06-09 13:15:00', end='2020-06-09 16:00:00', freq='15min').astype(int) / 10**9
op14.columns = train_data.columns


final_output = pd.concat([op11,op2,op6,op4,op5,op0,op8,op14,op1,op10,op12,op7,op9,op3,op13])

          0         1         2         3         4         5         6   \
0   1.000527  1.000406  1.000700  0.999964  0.995982  0.978739  0.993628   
1   1.000622  1.000507  1.000780  1.000094  0.996370  0.980280  0.994177   
2   1.000357  1.000305  1.000564  0.999913  0.996588  0.981526  0.994500   
3   0.999798  0.999779  1.000021  0.999411  0.996400  0.982387  0.994439   
4   0.999325  0.999334  0.999561  0.998987  0.996248  0.983154  0.994400   
5   0.999126  0.999131  0.999345  0.998805  0.996215  0.983879  0.994479   
6   0.999180  0.999155  0.999348  0.998861  0.996414  0.985173  0.994851   
7   0.999377  0.999422  0.999608  0.999137  0.997014  0.986363  0.995492   
8   0.999632  0.999711  0.999883  0.999445  0.997602  0.987823  0.996182   
9   0.999859  0.999959  1.000123  0.999704  0.998025  0.988747  0.996664   
10  1.000218  1.000280  1.000414  1.000072  0.998634  0.990996  0.997523   
11  1.000314  1.000401  1.000539  1.000188  0.998795  0.991037  0.997656   

          7

In [20]:
final_output.to_json('final_output_gru.json')