## Loading Training data

In [2]:
# importing packages 
import pandas as pd 

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import requests
import datetime
import torch
import torch.utils.data
import os
import importlib
import sys
import re
import pickle
from mpl_toolkits import mplot3d
from io import BytesIO
from math import log, exp, tan, atan, ceil
from PIL import Image

from utils import dataset_utils
from utils import createAISdata
#from utils import protobufDecoder
#from utils import plotting
from models import VRNN
from Config import config

# To measure the training time
from time import time

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f">> Using device: {device}")
if device=="cuda:0":
    torch.no_grad()
    torch.cuda.empty_cache()
#timestamp = datetime.datetime.fromtimestamp(update.t_epoch_sec).strftime('%d/%m/%Y %H:%M:%S')

shiptypes = config.SHIPTYPE_CARGO + config.SHIPTYPE_TANKER
shipFileName = 'test'
binedges = (config.LAT_EDGES, config.LON_EDGES, config.SOG_EDGES, config.COG_EDGES)
batch_size = 4

>> Using device: cpu


In [4]:
#path = "C://Users//asm//OneDrive - Netcompany/University//Master Thesis//Data//Pickle//CargTank_1911.pkl"
#path_index = "C://Users//asm//OneDrive - Netcompany/University//Master Thesis//Data//Pickle//CargTank_1911_idxs.pkl"

path = "C://Users//asm//OneDrive - Netcompany/University//Master Thesis//Data//test.pkl"
path_index = "C://Users//asm//OneDrive - Netcompany/University//Master Thesis//Data//CargTank_1911_idxs.pkl"

datasets_path = "C://Users//asm//OneDrive - Netcompany/University//Master Thesis//Data//"
#datasets_path = "C://Users//asm//OneDrive - Netcompany/University//Master Thesis//Data//Pickle//"

In [5]:
class PadSequence:
    def __call__(self, batch):
                
        # each element in "batch" is a tuple ( mmsis,  shiptypes,  lengths, inputs, targets)
        # Get each sequence and pad it
        mmsis = [x[0] for x in batch] # Maritime Mobile Service Identity numbers
        shiptypes = [x[1] for x in batch] # tank, cargo, etc.
        lengths = [x[2] for x in batch] # used as measure of size
        inputs = [x[3] for x in batch] # they are normalized 
        targets = [x[4] for x in batch] # seems to contain the real path of the vessel
                                        # lat, lon, speed, course (NOT NORMALIZED)
                
        inputs_padded = torch.nn.utils.rnn.pad_sequence(inputs, batch_first=True)
        targets_padded = torch.nn.utils.rnn.pad_sequence(targets, batch_first=True)

        return  torch.tensor(mmsis),  torch.tensor(shiptypes),  torch.tensor(lengths, dtype=torch.float), inputs_padded, targets_padded

In [7]:
# different lengths (use max/min for dimensions)
trainset = dataset_utils.AISDataset(dataPath = datasets_path, fileName = "CargTank_1911.pkl", indexFileName = config.index_fileName)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers = 0, collate_fn=PadSequence())

dataPath: C://Users//asm//OneDrive - Netcompany/University//Master Thesis//Data//
fileName: CargTank_1911.pkl
self.params[dataFileName]: CargTank_1911_idxs.pkl


self.datapath 12 C://Users//asm//OneDrive - Netcompany/University//Master Thesis//Data//CargTank_1911_idxs.pkl
index:  2777757   total_updates:  238708


In [8]:
testset = dataset_utils.AISDataset(dataPath = datasets_path, fileName = "CargTank_1911.pkl", indexFileName = config.index_fileName, train_mean = trainset.mean)
test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers = 0, collate_fn=PadSequence())

dataPath: C://Users//asm//OneDrive - Netcompany/University//Master Thesis//Data//
fileName: CargTank_1911.pkl
self.params[dataFileName]: CargTank_1911_idxs.pkl


In [16]:
train_n = len(trainset)
test_n = len(testset)
num_batches = len(train_loader)
num_epochs = 1

In [10]:
print('Training set size is: {} and Test set size is {} '.format(train_n,test_n))

Training set size is: 3604 and Test set size is 902 


In [11]:
model = VRNN.VRNN(input_shape=trainset.datadim, latent_shape=config.LATENT_SIZE, generative_bias=trainset.mean, device=device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003)

In [12]:
trainset.datadim

152

In [14]:
def computeLoss(log_px, log_pz, log_qz, lengths, beta=1):
    
    max_len = len(log_px)
    curmask = torch.arange(max_len, device=device)[:, None] < lengths[None, :] #max_seq_len X Batch
    
    log_px = torch.stack(log_px, dim=0) * curmask
    log_px = log_px.sum(dim=0) #Sum over time
   
    log_pz = torch.stack(log_pz, dim=0) * curmask
    log_qz = torch.stack(log_qz, dim=0) * curmask
    kl = log_qz.sum(dim=0) - log_pz.sum(dim=0) #Sum over time
    
    loss = log_px - beta * kl #recon loss - beta_kl
    loss = torch.mean(loss/lengths) #mean over batch
    
    return -loss, log_px, kl

In [15]:
loss_tot = []
kl_tot = []
recon_tot = []
val_loss_tot = []
val_kl_tot = []
val_recon_tot = []

In [25]:
## Examining the training set

#for i, (_, _, lengths, inputs, targets) in enumerate(train_loader):
    #print(lengths)
#    print('####################')
#    print(inputs.shape)
#    print('####################')
#    print(targets.shape)
    #targets = targets.to(device)
    #lengths = lengths.to(device)
#    print('--------------------------------------------------------')

In [18]:
def runModel():
    
    ##
    for epoch in range(1, num_epochs+1): #num_epochs+1
        #Begin training loop
        tic = time()

        loss_epoch = 0
        kl_epoch = 0
        recon_epoch = 0
        model.train()
        for i, (_, _, lengths, inputs, targets) in enumerate(train_loader):
            inputs = inputs.to(device)
            targets = targets.to(device)
            lengths = lengths.to(device)

            log_px, log_pz, log_qz, _, _ = model(inputs,targets,logits=None)

            loss, log_px, kl = computeLoss(log_px, log_pz, log_qz, lengths)

            model.zero_grad()
            loss.backward()
            optimizer.step()

            loss_epoch += loss.item()*len(lengths)
            kl_epoch += torch.sum(kl/lengths).item()
            recon_epoch += torch.sum(log_px/lengths).item()

        loss_tot.append(loss_epoch/train_n)
        kl_tot.append(kl_epoch/train_n)
        recon_tot.append(recon_epoch/train_n)

        #Begin validation loop
        val_loss = 0
        val_kl = 0
        val_recon = 0
        model.eval()
        for i, (_, _, lengths, inputs, targets) in enumerate(test_loader):
            inputs = inputs.to(device)
            targets = targets.to(device)
            lengths = lengths.to(device)

            log_px, log_pz, log_qz, _, _ = model(inputs,targets,logits=None)

            loss, log_px, kl = computeLoss(log_px, log_pz, log_qz, lengths)

            val_loss += loss.item()*len(lengths)
            val_kl += torch.sum(kl/lengths).item()
            val_recon += torch.sum(log_px/lengths).item()

        val_loss_tot.append(val_loss/test_n)
        val_kl_tot.append(val_kl/test_n)
        val_recon_tot.append(val_recon/test_n)

        datapoints = np.random.choice(test_n, size = 3, replace=False)
        #plotting.make_vae_plots((loss_tot, kl_tot, recon_tot, val_loss_tot, val_kl_tot, val_recon_tot), model, datapoints, testset, binedges, device)

        #print('Epoch {} of {} finished. Trainingloss = {}. Validationloss = {}'.format(epoch, num_epochs, loss_epoch/train_n, val_loss/test_n))
        print('Epoch {} of {} finished. Trainingloss = {}. Validationloss = {}'.format(epoch, num_epochs, loss_epoch/train_n, val_loss/test_n))

In [20]:
#index:  2777757   total_updates:  238708  totalRows:  238708
#index:  9727785   total_updates:  190279  totalRows:  190279