## Train Model

In [19]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import requests
import datetime
import torch
import torch.utils.data
import os
import importlib
import sys
import re
import pickle
from mpl_toolkits import mplot3d
from io import BytesIO
from math import log, exp, tan, atan, ceil
from PIL import Image

from utils import dataset_utils
from utils import createAISdata
#from utils import protobufDecoder
#from utils import plotting
from models import VRNN
from Config import config

# To measure the training time
from time import time

In [2]:
datasets_path = "C://Users//asm//OneDrive - Netcompany/University//Master Thesis//Data//Pickle//"

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f">> Using device: {device}")
if device=="cuda:0":
    torch.no_grad()
    torch.cuda.empty_cache()

>> Using device: cpu


In [4]:
shiptypes = config.SHIPTYPE_CARGO + config.SHIPTYPE_TANKER
shipFileName = 'test'
binedges = (config.LAT_EDGES, config.LON_EDGES, config.SOG_EDGES, config.COG_EDGES)
batch_size = 4

In [5]:
class PadSequence:
    def __call__(self, batch):
                
        # each element in "batch" is a tuple ( mmsis,  shiptypes,  lengths, inputs, targets)
        # Get each sequence and pad it
        mmsis = [x[0] for x in batch] # Maritime Mobile Service Identity numbers
        shiptypes = [x[1] for x in batch] # tank, cargo, etc.
        lengths = [x[2] for x in batch] # used as measure of size
        inputs = [x[3] for x in batch] # they are normalized 
        targets = [x[4] for x in batch] # seems to contain the real path of the vessel
                                        # lat, lon, speed, course (NOT NORMALIZED)
                
        inputs_padded = torch.nn.utils.rnn.pad_sequence(inputs, batch_first=True)
        targets_padded = torch.nn.utils.rnn.pad_sequence(targets, batch_first=True)

        return  torch.tensor(mmsis),  torch.tensor(shiptypes),  torch.tensor(lengths, dtype=torch.float), inputs_padded, targets_padded

In [6]:
startTime = time.time()

In [7]:
startTime = time.time()

trainset = dataset_utils.AISDataset(dataPath = datasets_path, fileName = "CargTank.pkl")
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers = 0, collate_fn=PadSequence())

endTime = time.time()

totalTimeInSec = endTime - startTime
totalTimeInminutes = totalTimeInSec // 60

print('Total Time in seconds: ',totalTimeInSec, ' total time in min: ', totalTimeInminutes)

self.Infopath  C://Users//asm//OneDrive - Netcompany/University//Master Thesis//Data//Pickle//CargTank.pkl
self.datapath:  C://Users//asm//OneDrive - Netcompany/University//Master Thesis//Data//Pickle//CargTank_idxs.pkl


dataPath: C://Users//asm//OneDrive - Netcompany/University//Master Thesis//Data//Pickle//
fileName: CargTank.pkl
self.params[dataFileName]: CargTank_idxs.pkl


self.datapath 12 C://Users//asm//OneDrive - Netcompany/University//Master Thesis//Data//Pickle//CargTank_idxs.pkl
index:  25193565   total_updates:  621159
Total Time in seconds:  41.36721992492676  total time in min:  0.0


In [8]:
testset = dataset_utils.AISDataset(dataPath = datasets_path, fileName = "CargTank.pkl", train_mean = trainset.mean)
test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers = 0, collate_fn=PadSequence())

self.Infopath  C://Users//asm//OneDrive - Netcompany/University//Master Thesis//Data//Pickle//CargTank.pkl
self.datapath:  C://Users//asm//OneDrive - Netcompany/University//Master Thesis//Data//Pickle//CargTank_idxs.pkl


dataPath: C://Users//asm//OneDrive - Netcompany/University//Master Thesis//Data//Pickle//
fileName: CargTank.pkl
self.params[dataFileName]: CargTank_idxs.pkl


In [9]:
train_n = len(trainset)
test_n = len(testset)
num_batches = len(train_loader)
num_epochs = 1

In [10]:
model = VRNN.VRNN(input_shape=trainset.datadim, latent_shape=config.LATENT_SIZE, generative_bias=trainset.mean, device=device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003)

load_model = False
if load_model:
    modelName = 'model_' + shipFileName + '_150'
    model.load_state_dict(torch.load('models/saved_models/' + modelName +'.pth', map_location=device))

model.to(device)

VRNN(
  (phi_x): Sequential(
    (0): Linear(in_features=752, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=100, bias=True)
  )
  (phi_z): Sequential(
    (0): Linear(in_features=100, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=100, bias=True)
  )
  (prior): Sequential(
    (0): Linear(in_features=100, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=200, bias=True)
  )
  (encoder): Sequential(
    (0): Linear(in_features=200, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=200, bias=True)
  )
  (decoder): Sequential(
    (0): Linear(in_features=200, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=752, bias=True)
  )
  (rnn): LSTM(200, 100)
)

In [11]:
def computeLoss(log_px, log_pz, log_qz, lengths, beta=1):
    
    max_len = len(log_px)
    curmask = torch.arange(max_len, device=device)[:, None] < lengths[None, :] #max_seq_len X Batch
    
    log_px = torch.stack(log_px, dim=0) * curmask
    log_px = log_px.sum(dim=0) #Sum over time
   
    log_pz = torch.stack(log_pz, dim=0) * curmask
    log_qz = torch.stack(log_qz, dim=0) * curmask
    kl = log_qz.sum(dim=0) - log_pz.sum(dim=0) #Sum over time
    
    loss = log_px - beta * kl #recon loss - beta_kl
    loss = torch.mean(loss/lengths) #mean over batch
    
    return -loss, log_px, kl

In [12]:
loss_tot = []
kl_tot = []
recon_tot = []
val_loss_tot = []
val_kl_tot = []
val_recon_tot = []

In [13]:
zmus12 = torch.zeros(testset.maxLength, test_n, config.LATENT_SIZE, device = 'cpu')

In [14]:
zmus12.shape

torch.Size([144, 2431, 100])

In [16]:
for epoch in range(1, num_epochs+1): #num_epochs+1
    #Begin training loop
    tic = time.time()

    loss_epoch = 0
    kl_epoch = 0
    recon_epoch = 0
    model.train()
    zmus = torch.zeros(testset.maxLength, test_n, config.LATENT_SIZE, device = 'cpu')
    
    fullLogits = torch.zeros(testset.maxLength, test_n, testset.datadim, device = 'cpu')
    
    for i, (mmsi, label, lengths, inputs, targets) in enumerate(test_loader):
        inputs = inputs.to(device)
        targets = targets.to(device)
        lengths = lengths.to(device)
        
        batch_size, seq_len, datadim = inputs.shape
        
        #logits = torch.zeros(seq_len, len(label), trainset.datadim , device = 'cpu')
        
        #Get the maximum length of the current batch
        max_len = int(torch.max(lengths).item())
        
        log_px, log_pz, log_qz, logits, _, z_mus = model(inputs,targets,label,logits=None)
        
        #Calculate endIndex which is used to store current batch in zmus
        endIndex = (batch_size*(i+1)) if (batch_size*(i+1)) <= test_n else test_n
   
        #Store current batch means in zmus
        zmus[:max_len,(batch_size*i):endIndex,:] = z_mus.detach().cpu()
        
        fullLogits[:max_len,(batch_size*i):endIndex,:] = logits.detach().cpu()
        
        loss, log_px, kl = computeLoss(log_px, log_pz, log_qz, lengths)
                
        #val_loss += loss.item()*len(lengths)
        #val_kl += torch.sum(kl/lengths).item()
        #val_recon += torch.sum(log_px/lengths).item()
        
        
        print('logits.shape: ', logits.shape)
        print('seq_len: ',seq_len)
        print('trainset.datadim: ',trainset.datadim)
        print('z_mus.shape ', z_mus.shape)
        print('--------------------------------')
        break;
        
        
        #z_means = {
        #    'Epoch': epoch,
        #    'z_means': z_mus,
        #    'labels': label,
        #    'log_px': log_px,
        #    'log_pz': log_pz,
        #    'log_qz': log_qz
        #    }
        
        #with open('models/saved_models/latentSpace_train.pkl', "wb") as f:
            #
        
        #    pickle.dump(z_means, f)
        
    

logits.shape:  torch.Size([70, 4, 752])
seq_len:  70
trainset.datadim:  752
z_mus.shape  torch.Size([70, 4, 100])
--------------------------------


In [18]:
fullLogits.shape

torch.Size([144, 2431, 752])

In [None]:
for epoch in range(1, num_epochs+1): #num_epochs+1
    #Begin training loop
    tic = time()

    loss_epoch = 0
    kl_epoch = 0
    recon_epoch = 0
    model.train()
    for i, (mmsi, label, lengths, inputs, targets) in enumerate(train_loader):
        inputs = inputs.to(device)
        targets = targets.to(device)
        lengths = lengths.to(device)
        
        log_px, log_pz, log_qz, _, _, z_mus = model(inputs,targets,label,logits=None)
        
        loss, log_px, kl = computeLoss(log_px, log_pz, log_qz, lengths)
        
        model.zero_grad()
        loss.backward()
        optimizer.step()
        
        loss_epoch += loss.item()*len(lengths)
        kl_epoch += torch.sum(kl/lengths).item()
        recon_epoch += torch.sum(log_px/lengths).item()
        
        #z_means = {
        #    'Epoch': epoch,
        #    'z_means': z_mus,
        #    'labels': label,
        #    'log_px': log_px,
        #    'log_pz': log_pz,
        #    'log_qz': log_qz
        #    }
        
        #with open('models/saved_models/latentSpace_train.pkl', "wb") as f:
            #
        
        #    pickle.dump(z_means, f)
    
    loss_tot.append(loss_epoch/train_n)
    kl_tot.append(kl_epoch/train_n)
    recon_tot.append(recon_epoch/train_n)
    
    #Begin validation loop
    zmus = torch.zeros(testset.maxLength, test_n, config.LATENT_SIZE, device = 'cpu')
    
    fullLogits = torch.zeros(testset.maxLength, test_n, testset.datadim, device = 'cpu')
    
    toc = time()
    
    trainTimeInSec = (toc-tic)
    
    print('Total time taken for training {} epoch is {} seconds and {} min'.format(i, trainTimeInSec, (trainTimeInSec//60)))
    
    
    val_loss = 0
    val_kl = 0
    val_recon = 0
    model.eval()
    for i, (mmsi, label, lengths, inputs, targets) in enumerate(test_loader):
        inputs = inputs.to(device)
        targets = targets.to(device)
        lengths = lengths.to(device)
        
        batch_size, seq_len, datadim = inputs.shape
        
        #logits = torch.zeros(seq_len, len(label), trainset.datadim , device = 'cpu')
        
        #Get the maximum length of the current batch
        max_len = int(torch.max(lengths).item())
        
        log_px, log_pz, log_qz, logits, _, z_mus = model(inputs,targets,label,logits=None)
        
        #Calculate endIndex which is used to store current batch in zmus
        endIndex = (batch_size*(i+1)) if (batch_size*(i+1)) <= test_n else test_n
   
        #Store current batch means in zmus
        zmus[:max_len,(batch_size*i):endIndex,:] = z_mus.detach().cpu()
        
        fullLogits[:max_len,(batch_size*i):endIndex,:] = logits.detach().cpu()
        
        loss, log_px, kl = computeLoss(log_px, log_pz, log_qz, lengths)
                
        val_loss += loss.item()*len(lengths)
        val_kl += torch.sum(kl/lengths).item()
        val_recon += torch.sum(log_px/lengths).item()
    
    testToc = time()
    testTimeInSec = (testToc-tic) - trainTimeInSec
    
    print('Total time taken for test {} epoch is {} seconds and {} min'.format(i, testTimeInSec, (testTimeInSec//60)))
        
    with open('models/saved_models/latentSpace_test.pkl', "wb") as f:
        
        ###
        z_means = {
            'Epoch': epoch,
            'z_means': zmus,
            'Logits' : fullLogits

         }
        
        pickle.dump(z_means, f)
        
    saveToc = time()
    saveTimeInSec = (saveToc-tic) - testToc
    
    print('Total time taken for test {} epoch is {} seconds and {} min'.format(i, saveTimeInSec, (saveTimeInSec//60)))
    
    
    val_loss_tot.append(val_loss/test_n)
    val_kl_tot.append(val_kl/test_n)
    val_recon_tot.append(val_recon/test_n)
    
    datapoints = np.random.choice(test_n, size = 3, replace=False)
    #plotting.make_vae_plots((loss_tot, kl_tot, recon_tot, val_loss_tot, val_kl_tot, val_recon_tot), model, datapoints, testset, binedges, device)
    
    #print('Epoch {} of {} finished. Trainingloss = {}. Validationloss = {}'.format(epoch, num_epochs, loss_epoch/train_n, val_loss/test_n))
    dataset_utils.eprint('Epoch {} of {} finished. Trainingloss = {}. Validationloss = {}'.format(epoch, num_epochs, loss_epoch/train_n, val_loss/test_n))

    toc = time()
    dataset_utils.eprint("Time taken to train and test in {} epoch is {}".format(epoch, (toc-tic)))
    
    trainingCurves = {
        'loss_tot': loss_tot,
        'kl_tot': kl_tot,
        'recon_tot': recon_tot,
        'val_loss_tot': val_loss_tot,
        'val_kl_tot': val_kl_tot,
        'val_recon_tot': val_recon_tot
    }
    with open('models/saved_models/trainingCurves_' + shipFileName + '.pkl', "wb") as f:
        pickle.dump(trainingCurves, f)
    
    if (epoch%5==0):
        torch.save(model.state_dict(), 'models/saved_models/model_' + shipFileName + '_' + str(epoch) + '.pth')
        

trainingCurves = {
    'loss_tot': loss_tot,
    'kl_tot': kl_tot,
    'recon_tot': recon_tot,
    'val_loss_tot': val_loss_tot,
    'val_kl_tot': val_kl_tot,
    'val_recon_tot': val_recon_tot
}

print('loss_tot: {}'.format(loss_tot))
print('kl_tot: {}'.format(kl_tot))
print('recon_tot: {}'.format(recon_tot))
print('val_loss_tot: {}'.format(val_loss_tot))
print('val_kl_tot: {}'.format(val_kl_tot))
print('val_recon_tot: {}'.format(val_recon_tot))

torch.save(model.state_dict(), 'models/model_' + shipFileName + '.pth')
with open('models/trainingCurves_' + shipFileName + '.pkl', "wb") as f:
        pickle.dump(trainingCurves, f)

Total time taken for training 2429 epoch is 971.4165139198303 seconds and 16.0 min
