<a href="https://colab.research.google.com/github/candac/CS231n/blob/master/Train_DeepSP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils import clip_grad_norm_
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence, pad_sequence
from torch.utils.data import Dataset, DataLoader


import collections
import pickle
from datetime import datetime
from math import radians, cos, sin, asin, sqrt

import json

torch.set_printoptions(precision=6)

## Dataset

In [9]:
class StayPointTrainDataset(Dataset):
    def __init__(self, data_path):
        #load dataset
        self.data_path = data_path
        self.trainset = [json.loads(line) for line in open(self.data_path + 'sample_train.json', 'r')]
        self.train_size = len(self.trainset)
        mapslen = pd.read_csv(self.data_path + "maplengths.csv", names=["usermaplength"], header=None)
        self.num_userid = mapslen["usermaplength"].values[0]

    def __len__(self):
        #return length of dataset
        return len(self.trainset)

    def __getitem__(self, idx):
        #return a sample of dataset
        x_session, y_session, realtime_session = \
        self.session_processing(self.trainset[idx]["stays"], self.trainset[idx]["num_stays"])
        return (x_session, y_session, realtime_session, int(self.trainset[idx]["user_id"][2:]))


    def session_processing(self, session, numstays):  
        x_session, y_session, realtime_session = np.zeros(numstays), np.zeros(numstays),np.zeros(numstays)
        for i,chkin in enumerate(session):
            x, y, real_time = chkin["x"],chkin["y"], chkin["time_index"]
            x_session[i] = np.float(x)
            y_session[i] = np.float(y)
            realtime_session[i] = real_time
        return list(x_session), list(y_session), list(realtime_session)

In [10]:
class StayPointValidationDataset(Dataset):
    def __init__(self, data_path):
        #load dataset
        self.data_path = data_path
        self.valset = [json.loads(line) for line in open(self.data_path + 'sample_val.json', 'r')]
        self.val_size = len(self.valset)
        mapslen = pd.read_csv(self.data_path + "maplengths.csv", names=["usermaplength"], header=None)
        self.num_userid = mapslen["usermaplength"].values[0]

    def __len__(self):
        #return length of dataset
        return len(self.valset)

    def __getitem__(self, idx):
        #return a sample of dataset
        x_session, y_session, realtime_session = \
        self.session_processing(self.valset[idx]["stays"], self.valset[idx]["num_stays"])
        return (x_session, y_session, realtime_session, int(self.valset[idx]["user_id"][2:]))


    def session_processing(self, session, numstays):  
        x_session, y_session, realtime_session = np.zeros(numstays), np.zeros(numstays),np.zeros(numstays)
        for i,chkin in enumerate(session):
            x, y, real_time = chkin["x"],chkin["y"], chkin["time_index"]
            x_session[i] = np.float(x)
            y_session[i] = np.float(y)
            realtime_session[i] = real_time
        return list(x_session), list(y_session), list(realtime_session)

In [11]:
def pad_collate(batch):
    (xx, yy, tt, uu) = zip(*batch)
    uu_ = torch.LongTensor(uu)
    seq_lens = torch.LongTensor([len(x) for x in xx]) # same value for yy and tt

    batch_x = [ torch.FloatTensor(x) for x in xx ]
    batch_y = [ torch.FloatTensor(y) for y in yy ]
    batch_t = [ torch.LongTensor(t) for t in tt ]

    xx_pad = pad_sequence(batch_x, batch_first=True, padding_value=0.0)
    yy_pad = pad_sequence(batch_y, batch_first=True, padding_value=0.0)
    tt_pad = pad_sequence(batch_t, batch_first=True, padding_value=0.0)

    #return xx_pad, yy_pad, tt_pad, seq_lens, mask
    return xx_pad.to(device), yy_pad.to(device), tt_pad.to(device), uu_.to(device), seq_lens.to(device)

## Bidirectional RNN-GRU

In [12]:
class Flat_RNN_GRU(nn.Module):
    def __init__(self, hidden_dim, num_layers, dropout, bidirectional):
        super(Flat_RNN_GRU, self).__init__()
        #Network layer sizes
        self.hidden_dim = hidden_dim
        self.output_dim = 2
        
        #Output layers
        self.gru = nn.GRU(3, hidden_dim, batch_first=True, num_layers=num_layers, dropout=dropout, bidirectional=bidirectional)
        #self.fc_final = nn.Linear(hidden_dim, self.output_dim)  # uni-directional RNN
        self.fc_final = nn.Linear(hidden_dim*2, self.output_dim) # bi-directional RNN

    def forward(self, inputs_x, inputs_y, inputs_time, inputs_user, session_lengths):
        #input_x to same format as time_embed and user_embed
        inputs_x = inputs_x.view(inputs_x.size()[0],inputs_x.size()[1], 1)
        #input_y to same format as time_embed and user_embed
        inputs_y = inputs_y.view(inputs_y.size()[0],inputs_y.size()[1], 1)
        #input_t to same format 
        inputs_t = inputs_time.view(inputs_time.size()[0],inputs_time.size()[1], 1)


        x = torch.cat(( inputs_x, inputs_y, inputs_t ), dim=2) #on tensor dimension 2 concatenate

        gru_output, _ = self.gru(x) #to do: add init hidden

        final_output = self.fc_final(gru_output) #to do: is fc_final needed for just gru_output

        idxs = session_lengths - 2
        hidden_indices = idxs.view(-1, 1, 1).expand(gru_output.size(0), 1, gru_output.size(2))
        hidden_out = torch.gather(gru_output, 1, hidden_indices)
        hidden_out = hidden_out.squeeze().unsqueeze(0) 
        return final_output, gru_output, hidden_out

In [13]:
def masked_squared_error_loss(y_hat, y):
    """
    y_hat:  batch*sequence_len x 2
    y:  batch*sequence_len x 2
    --------------
    loss: distance(y_hat, y) **2
    """
    lat_hat = torch.transpose(y_hat,0,1)[0] + 1
    lon_hat = torch.transpose(y_hat,0,1)[1] + 103
    #y_hat2 = torch.cat([lat_hat.view(-1,1), lon_hat.view(-1,1)],1)

    lat = torch.transpose(y,0,1)[0] + 1
    lon = torch.transpose(y,0,1)[1] + 103
    #y2 = torch.cat([lat.view(-1,1), lon.view(-1,1)],1)

    dist = tensor_distance(lat_hat, lon_hat, lat, lon)
    dist2 = dist**2

    mask = torch.transpose(y.sign().float(),0,1)[0] # to filter out padding
    loss = dist2 * mask   # makes zeros where padding was added

    return loss.view(-1, 1)  #torch.Size([batch * sequence_len x 1])

In [14]:
def tensor_distance(lat_hat, lon_hat, lat, lon): 
    """
    Calculate the equirectangular approx distance between two points
    on the earth (specified in decimal degrees)
    """
    # transform from degrees to radians
    lat2 = torch.deg2rad(lat_hat)
    lat1 = torch.deg2rad(lat)
    lon2 = torch.deg2rad(lon_hat)
    lon1 = torch.deg2rad(lon)

    # equirectangular approximation
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    x = (lon2 - lon1) * torch.cos((lat1 + lat2)/2.)
    y = (lat2 - lat1) 
    r = 6371  # Earth's radius in km
    return torch.sqrt(x**2 + y**2) * r

## Utils

In [15]:
def process_train(x_batch, y_batch, t_batch):
    return x_batch[:, :-1], x_batch[:,1:], y_batch[:, :-1], y_batch[:,1:], t_batch[:, :-1].to(torch.int64), t_batch[:,1:].to(torch.int64)

## Train and Validation

In [25]:
epochs = 100
batch_size = 64 #32, 64
hidden_size = 120 #150
learning_rate = 0.0005 #0.0005, 0.0001
num_layers = 2 #2
max_grad_norm = 1.0
dropout= 0.15    #0.15, 0.05
bidirectional = True

#Init
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

data_path = "/content/drive/My Drive/DeepSP/data/medium_sample/"
train_dataset = StayPointTrainDataset(data_path)
trainloader = DataLoader(dataset=train_dataset,
                      batch_size=batch_size,
                      shuffle=True,
                      collate_fn=pad_collate, # use custom collate function here
                      pin_memory=False)
print("training dataset loaded")

val_dataset = StayPointValidationDataset(data_path)
valloader = DataLoader(dataset=val_dataset,
                      batch_size=batch_size,
                      shuffle=True,
                      collate_fn=pad_collate, # use custom collate function here
                      pin_memory=False)
print("validation dataset loaded")

intra_rnn_model = Flat_RNN_GRU(hidden_size, num_layers, dropout, bidirectional).to(device) #Simple model

pred_optim = torch.optim.Adam(intra_rnn_model.parameters(), lr=learning_rate, weight_decay=1e-5)

# #Uncomment to load model; if training from scratch comment out
# PATH = "/content/drive/My Drive/DeepSP/saved" + '/model_best_20201004'
# checkpoint = torch.load(PATH) #Attention with map_location=torch.device('cpu')
# intra_rnn_model.load_state_dict(checkpoint['intra_rnn'])
# pred_optim.load_state_dict(checkpoint['pred_optim'])
# print("model loaded")

training dataset loaded
validation dataset loaded
model loaded


In [None]:
# epochs = 10

In [27]:
#Epoch loop
for epoch in range(epochs):
    intra_rnn_model.train()
    loss_epoch = 0
    vloss_epoch = 0
    last_vloss_epoch = 0
    count = 0
    list_last_predict = []
    list_last_target = []
    list_second_last = []

    #Loop for batches
    for i_batch, (xx_pad, yy_pad, tt_pad, batch_user, seq_lens) in enumerate(trainloader):

        #Divide into inputs and targets
        x_pad, x_pad_target, y_pad, y_pad_target, t_pad, t_pad_target = process_train(xx_pad, yy_pad, tt_pad)

        #RNN
        predicted, _, _ = intra_rnn_model(x_pad, y_pad, t_pad, batch_user, seq_lens)
        
        #Loss
        predicted_loss = masked_squared_error_loss(predicted.view(-1, 2), \
                                                   torch.cat([x_pad_target.reshape(-1,1), \
                                                              y_pad_target.reshape(-1,1)],dim=1))
        
        mean_loss = predicted_loss.sum(0) / (seq_lens - 1).sum().float()

        pred_optim.zero_grad() # set gradients to zero
        mean_loss.backward()
        clip_grad_norm_(intra_rnn_model.parameters(), max_grad_norm)
        pred_optim.step() # update weigths


        loss_epoch += float(mean_loss.item())


        if (i_batch + 1) % 200 == 0:
            print('Epoch [{}/{}], Step {}, Left user: {}, Loss: {:.4f}'
                  .format(epoch+1, epochs, i_batch+1, train_dataset.train_size - ((i_batch+1) * batch_size),
                          mean_loss.item()))
    

    #Validation
    intra_rnn_model.eval()

    #Loop for batches
    for v_batch, (xx_pad, yy_pad, tt_pad, batch_user, seq_lens) in enumerate(valloader):

        #Divide into inputs and targets
        x_pad, x_pad_target, y_pad, y_pad_target, t_pad, t_pad_target = process_train(xx_pad, yy_pad, tt_pad)

        #RNN
        vpredicted, _, _ = intra_rnn_model(x_pad, y_pad, t_pad, batch_user, seq_lens)
        
        #Loss
        vpredicted_loss = masked_squared_error_loss(vpredicted.view(-1, 2), \
                                                   torch.cat([x_pad_target.reshape(-1,1), \
                                                              y_pad_target.reshape(-1,1)],dim=1))
        
        vmean_loss = vpredicted_loss.sum(0) / (seq_lens - 1).sum().float()
        vloss_epoch += float(vmean_loss.item())

        #Last staypoint loss
        idxs = seq_lens - 2

        target_last_idxs = idxs.view(-1, 1)
        x_pad_target_last = torch.gather(x_pad_target, 1, target_last_idxs)
        y_pad_target_last = torch.gather(y_pad_target, 1, target_last_idxs)
        target_last = torch.cat([x_pad_target_last, y_pad_target_last], dim=1)

        predicted_last_idxs = idxs.view(-1, 1)\
        .expand(vpredicted.size()[0],vpredicted.size()[2])\
        .reshape(vpredicted.size()[0], 1, vpredicted.size()[2])
        vpredicted_last = torch.gather(vpredicted, 1, predicted_last_idxs)\
        .reshape(vpredicted.size()[0], vpredicted.size()[2])

        last_vpredicted_loss = masked_squared_error_loss(vpredicted_last, target_last)
        last_vmean_loss = last_vpredicted_loss.sum(0) / len(seq_lens)
        last_vloss_epoch += float(last_vmean_loss.item())

        #target second last (i.e. last observable stay-point)
        x_second_last = torch.gather(x_pad, 1, target_last_idxs)
        y_second_last = torch.gather(y_pad, 1, target_last_idxs)
        second_last = torch.cat([x_second_last, y_second_last], dim=1)


        #Save if last batch
        if epoch == (epochs - 1):
            list_last_predict.append(vpredicted_last.detach())
            list_last_target.append(target_last.detach())
            list_second_last.append(second_last.detach())
    
    #Print epoch results
    print("Epoch {} finished, average train loss of this epoch is {:.4f}, \
    average val loss of this epoch is {:.4f}, average last staypoint loss of this epoch is {:.4f}"
          .format(epoch + 1, loss_epoch / (i_batch+1), vloss_epoch / (v_batch+1), last_vloss_epoch/(v_batch+1)))  
    
    # #Uncomment to create saving checkpoints for network
    # if (epoch + 1) % (10) == 0:
    #     torch.save({"intra_rnn": intra_rnn_model.state_dict(),
    #                 "pred_optim": pred_optim.state_dict()}, "/content/drive/My Drive/DeepSP/saved" + '/model_epoch{}'.format(epoch + 1))
    #     print("Epoch {} model saved".format(epoch + 1))

Epoch [1/10], Step 200, Left user: 7811, Loss: 5.9655
Epoch 1 finished, average train loss of this epoch is 8.6839,     average val loss of this epoch is 2.1329, average last staypoint loss of this epoch is 5.2492
Epoch [2/10], Step 200, Left user: 7811, Loss: 4.9720
Epoch 2 finished, average train loss of this epoch is 8.3440,     average val loss of this epoch is 11.8856, average last staypoint loss of this epoch is 19.4549
Epoch [3/10], Step 200, Left user: 7811, Loss: 4.3069
Epoch 3 finished, average train loss of this epoch is 5.4102,     average val loss of this epoch is 2.3166, average last staypoint loss of this epoch is 5.4532
Epoch [4/10], Step 200, Left user: 7811, Loss: 7.7807
Epoch 4 finished, average train loss of this epoch is 11.0985,     average val loss of this epoch is 11.6289, average last staypoint loss of this epoch is 13.5865
Epoch [5/10], Step 200, Left user: 7811, Loss: 5.6344
Epoch 5 finished, average train loss of this epoch is 7.6922,     average val loss of

## Runs log - Medium Sample

###Bidirectional Simple Model

Epoch 50, average test loss is 20.5

Epoch 100, average test loss is 4.7683

Epoch >100, average test lost is 1.6483

#### Parameters
<ul>
<li>epochs = >100</li>
<li>batch_size = 64 #32, 64 </li>
<li>hidden_size = 120 #150</li>
<li>learning_rate = 0.0005 #0.0005, 0.0001</li>
<li>num_layers = 2 #2</li>
<li>max_grad_norm = 1.0</li>
<li>dropout=0.15    #0.15</li>
<li>bidirectional = True</li>
</ul>
