In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
%matplotlib inline
import sys
import os
import math
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.animation as animation
from mpl_toolkits.basemap import Basemap
from matplotlib.patches import Circle
import seaborn as sns; 
from IPython.display import HTML

In [None]:
#config parser
import configparser

sys.path.insert(0, '../Common/')
from AISDataManager import AISDataManager
import Constants as c
import HMUtils as hMUtil
import TimeUtils as timeUtils
import GeoCompute as gC

#MyConfig.INI stores all the run time constants
config = configparser.ConfigParser()
config.read('../MyConfig.INI')

from joblib import Parallel, delayed
import multiprocessing
aISDM = AISDataManager()

In [None]:
lonMin = (float)(config['TRAJ_PRED_LSTM_GENERAL']['LON_MIN'])
lonMax = (float)(config['TRAJ_PRED_LSTM_GENERAL']['LON_MAX'])

latMin = (float)(config['TRAJ_PRED_LSTM_GENERAL']['LAT_MIN'])
latMax = (float)(config['TRAJ_PRED_LSTM_GENERAL']['LAT_MAX'])

print(lonMin,latMin)
print(lonMax,latMax)

increStep = (float)(config['TRAJ_PRED_LSTM_GENERAL']['INCR_STEP'])
incrRes = (int)(config['TRAJ_PRED_LSTM_GENERAL']['INCR_RES'])

sourceDir1 = config['TRAJ_PRED_LSTM_GENERAL']['SOURCE_DIR_1']
sourceDir2 = config['TRAJ_PRED_LSTM_GENERAL']['SOURCE_DIR_2']

trainTrajNum1 = (int)(config['TRAJ_PRED_LSTM_GENERAL']['TRAIN_DATA_1'])
trainTrajNum2 = (int)(config['TRAJ_PRED_LSTM_GENERAL']['TRAIN_DATA_2'])

testEndTrajNum1 = (int)(config['TRAJ_PRED_LSTM_GENERAL']['TEST_END_1'])
testEndTrajNum2 = (int)(config['TRAJ_PRED_LSTM_GENERAL']['TEST_END_2'])

type1 = (int)(config['TRAJ_PRED_LSTM_GENERAL']['TYPE_1'])
type2 = (int)(config['TRAJ_PRED_LSTM_GENERAL']['TYPE_2'])

year1 = (int)(config['TRAJ_PRED_LSTM_GENERAL']['YEAR_1'])
year2 = (int)(config['TRAJ_PRED_LSTM_GENERAL']['YEAR_2'])

dataDir = config['TRAJ_PRED_LSTM_GENERAL']['DATA_DIR']
featuresAsTS = config['TRAJ_PRED_LSTM_GENERAL']['TS_FEATURE'].split('_')
featuresAsFusion = config['TRAJ_PRED_LSTM_GENERAL']['FUSION_FEATURE'].split('_')

prevTS = (int)(config['TRAJ_PRED_LSTM_GENERAL']['PREVIOUS_TIME_STAMP'])
numDestination = (int)(config['TRAJ_PRED_LSTM_GENERAL']['NUM_DEST'])
maxTypes = (int)(config['TRAJ_PRED_LSTM_GENERAL']['MAX_TYPES'])

print(sourceDir1)
print(sourceDir2)

print(trainTrajNum1, testEndTrajNum1)
print(trainTrajNum2, testEndTrajNum2)

print(type1) 
print(type2)

print(year1)
print(year2)

print(dataDir)

print(featuresAsTS)
print(featuresAsFusion)

print(prevTS)
print(numDestination)
print(maxTypes)

In [None]:
#parsing block for features

usefeature = { \
    'DONT_USE':0 \
    ,'USE_AS_TS':1 \
    ,'USE_AS_FUSION':2 \
}

featureName = { \
    'Length':'Len' \
    ,'Type':'Type' \
    ,'Destination':'Dest' \
}

#0 dont use it
#1 use it as TS
#2 use it as Fusion
useLen = usefeature['DONT_USE']
useType = usefeature['DONT_USE']
useDest = usefeature['DONT_USE']

if(featureName['Length'] in featuresAsTS):
    useLen = usefeature['USE_AS_TS']
elif(featureName['Length'] in featuresAsFusion):
    useLen = usefeature['USE_AS_FUSION']
    
if(featureName['Type'] in featuresAsTS):
    useType = usefeature['USE_AS_TS']
elif(featureName['Type'] in featuresAsFusion):
    useType = usefeature['USE_AS_FUSION']
    
if(featureName['Destination'] in featuresAsTS):
    useDest = usefeature['USE_AS_TS']
elif(featureName['Destination'] in featuresAsFusion):
    useDest = usefeature['USE_AS_FUSION']
    
print(useLen)
print(useType)
print(useDest)

In [None]:
class VesselTypeSource:
    """
    The VesselTypeSource object contains lots of directories related
    information. from which we will be loading the data 
    
    """
    def __init__(self, srcDir, trainNum, testNumEnd, typeVes, year):
        self.srcDir = srcDir
        self.trainNum = trainNum
        self.testNumEnd = testNumEnd
        self.type = typeVes
        self.year = year
        
    def __str__(self):
        return str(self.__class__) + ": " + str(self.__dict__)

vesselSource1 = VesselTypeSource(sourceDir1, trainTrajNum1, testEndTrajNum1, type1, year1)
vesselSource2 = VesselTypeSource(sourceDir2, trainTrajNum2, testEndTrajNum2, type2, year2)

vesselDataSources = [vesselSource1, vesselSource2]
print(vesselDataSources[0])
print(vesselDataSources[1])

In [None]:
def get_traj_lon_lat_data(sourceDir, num):
    """
    """
    #read the dataframe
    sorceFile = sourceDir + str(num) + '.csv'
    sourceDF,_ = aISDM.load_data_from_csv(sorceFile)
    #return LON and LAT column
    return sourceDF.loc[:,['LON','LAT']].to_numpy()

# get_traj_lon_lat_data(vesselDataSources[0].srcDir, 0)

In [None]:
def get_traj_lon_lat_data_with_len(sourceDir, num):
    """
    """
    #read the dataframe corresponding to traj number
    sorceFile = sourceDir + str(num) + '.csv'
    sourceDF,_ = aISDM.load_data_from_csv(sorceFile)
    return sourceDF.loc[:,['LON','LAT']].to_numpy(), sourceDF.loc[0,'Length']

# get_traj_lon_lat_data_with_len(vesselDataSources[0].srcDir, 0)

In [None]:
#make list of all such trajectories
#this is training data
trajSeqList = []
typeList = []

if(useLen != 0):
    lenArr = []
    
for vesselDataSource in vesselDataSources:
    print("Taking data from:%s"%vesselDataSource.srcDir)
    for trajNum in range(0,vesselDataSource.trainNum):
        if(useLen == 0):
            seqData = get_traj_lon_lat_data(vesselDataSource.srcDir,trajNum)
            trajSeqList.append((seqData, 0.1))
        else:
            seqData,lenData = get_traj_lon_lat_data_with_len(vesselDataSource.srcDir,trajNum)
            trajSeqList.append((seqData,lenData))
            lenArr.append(lenData)        
            
        typeList.append(vesselDataSource.type)
            
print(trajSeqList[0])
print(typeList[0])
    
if(useLen != 0):
    lenArrNP = np.array(lenArr)
    lenMin = np.min(lenArrNP)
    lenMax = np.max(lenArrNP)
    print(lenMin, lenMax)

In [None]:
lastPosArr = np.zeros((0,2))

for vesselDataSource in vesselDataSources:
    print("Taking data from:%s"%vesselDataSource.srcDir)
    for trajNum in range(0,vesselDataSource.testNumEnd):
        ret = get_traj_lon_lat_data(vesselDataSource.srcDir,trajNum)
        lastPosArr = np.vstack((lastPosArr,np.reshape(ret[-1,:],(1,2))))

In [None]:
print(lastPosArr)
print(lastPosArr.shape)

In [None]:
from sklearn.cluster import KMeans
Kmean = KMeans(n_clusters=numDestination)
Kmean.fit(lastPosArr)

In [None]:
print(Kmean.labels_)
Kmean.cluster_centers_
destFeature = Kmean.cluster_centers_[Kmean.labels_]
print(destFeature.shape)

In [None]:
plt.scatter(lastPosArr[:,0],lastPosArr[:,1])
plt.scatter(Kmean.cluster_centers_[:,0],Kmean.cluster_centers_[:,1])

In [None]:
def convert_seq_to_x_y_lon_lat(seq, typeVal = -1, lenVal = -1, destVal = None, prevTimeStamp = prevTS):
    """
    """
    #first column
    #-2 is is to take care of boundary condition
    #since we are considering 2 time stamps for the input data
    xNumRows = seq[:-(prevTS),:].shape[0]
    
    lenData = np.zeros((xNumRows, 1))    
    lenData[:,:] = lenVal
    
    typeData = np.zeros((xNumRows, maxTypes))
    #one hot encoding
    typeData[:,typeVal] = 1
    
    destArr = np.zeros((xNumRows,2))
    destArr[:,0] = destVal[0]
    destArr[:,1] = destVal[1]

    lonLatColList = []
    for start in range(prevTimeStamp):
        lonLatColList.append(seq[start:(-prevTimeStamp+start),:].copy())
        
    outputLabel = seq[prevTimeStamp:,:].copy()
    
    xDataTS = np.zeros((xNumRows,0))
    xDataFusion = np.zeros((xNumRows,0))
    
    for lonLatCol in lonLatColList:
        xDataTS = np.hstack((xDataTS,lonLatCol))
        if(useType == usefeature['USE_AS_TS']):
            xDataTS = np.hstack((xDataTS,typeData))
        if(useLen == usefeature['USE_AS_TS']):
            xDataTS = np.hstack((xDataTS,lenData))
        if(useDest == usefeature['USE_AS_TS']):
            xDataTS = np.hstack((xDataTS,destArr))
            
    if(useType == usefeature['USE_AS_FUSION']):
        xDataFusion = np.hstack((xDataFusion,typeData))
    if(useLen == usefeature['USE_AS_FUSION']):
        xDataFusion = np.hstack((xDataFusion,lenData))
    if(useDest == usefeature['USE_AS_FUSION']):
        xDataFusion = np.hstack((xDataFusion,destArr))
    
    return xDataTS, xDataFusion, outputLabel

convert_seq_to_x_y_lon_lat(trajSeqList[0][0],typeList[0],199.94,list(destFeature[0]),2)

In [None]:
#now iterate throgh trajSeqList 
#and keep on stacking them vertically
#to make giant input and output matrix
tSCol = 2
if(useType == usefeature['USE_AS_TS']):
    tSCol = tSCol + maxTypes
if(useLen == usefeature['USE_AS_TS']):
    tSCol = tSCol + 1
if(useDest == usefeature['USE_AS_TS']):
    tSCol = tSCol + 2
    
numTSFeature = tSCol

tSCol = tSCol * prevTS
xDataTS = np.zeros((0,tSCol))

fusionCol = 0
if(useType == usefeature['USE_AS_FUSION']):
    fusionCol = fusionCol + maxTypes
if(useLen == usefeature['USE_AS_FUSION']):
    fusionCol = fusionCol + 1
if(useDest == usefeature['USE_AS_FUSION']):
    fusionCol = fusionCol + 2

xDataFusion = np.zeros((0,fusionCol))
yData = np.zeros((0,2))
print(xDataTS.shape)
print(xDataFusion.shape)
print(yData.shape)
for trajNum in range(len(trajSeqList)):
    if((trajSeqList[trajNum][0].shape[0]) > prevTS):
        if(useLen == usefeature['DONT_USE']):
            
            xTSTemp, xFusionTemp, yTemp = convert_seq_to_x_y_lon_lat(trajSeqList[trajNum][0] \
                                                                     ,typeVal = typeList[trajNum] \
                                                                     ,destVal = list(destFeature[trajNum]) \
                                                                     ,prevTimeStamp = prevTS)
        else:
            xTSTemp, xFusionTemp, yTemp = convert_seq_to_x_y_lon_lat(trajSeqList[trajNum][0] \
                                                                     ,typeVal = typeList[trajNum] \
                                                                     ,lenVal = trajSeqList[trajNum][1] \
                                                                     ,destVal = list(destFeature[trajNum]) \
                                                                     ,prevTimeStamp = prevTS)
        xDataTS = np.vstack((xDataTS,xTSTemp.copy()))
        xDataFusion = np.vstack((xDataFusion,xFusionTemp.copy()))
        yData = np.vstack((yData,yTemp.copy()))
        
# print(xDataTS)
# print(xDataFusion)
# print(yData)

In [None]:
print(xDataTS.shape)
print(xDataFusion.shape)
print(yData.shape)

In [None]:
xTSToStore = dataDir + "XDataTS.npy"
xFToStore = dataDir + "XDataF.npy"
yToStore = dataDir + "YData.npy"
np.save(xTSToStore, xDataTS)
np.save(xFToStore, xDataFusion)
np.save(yToStore, yData)

In [None]:
xTSToStore = dataDir + "XDataTS.npy"
xFToStore = dataDir + "XDataF.npy"
yToStore = dataDir + "YData.npy"

xDataTS = np.load(xTSToStore)
xDataFusion = np.load(xFToStore)
yData = np.load(yToStore)

In [None]:
print(xDataTS.shape)
print(xDataFusion.shape)
print(yData.shape)

In [None]:
xDataTSNorm = xDataTS.copy()
colAccess = 0
for prevTime in range(prevTS):
    xDataTSNorm[:,(colAccess) + 0] = (xDataTS[:,(colAccess) + 0] - lonMin)/(lonMax - lonMin)
    xDataTSNorm[:,(colAccess) + 1] = (xDataTS[:,(colAccess) + 1] - latMin)/(latMax - latMin)
    colAccess = colAccess + 2
    if(useType == usefeature['USE_AS_TS']):
        colAccess = colAccess + maxTypes
    if(useLen == usefeature['USE_AS_TS']):
        xDataTSNorm[:,colAccess] = (xDataTS[:,colAccess] - lenMin)/(lenMax - lenMin)
        colAccess = colAccess + 1
    if(useDest == usefeature['USE_AS_TS']):
        xDataTSNorm[:,(colAccess) + 0] = (xDataTS[:,(colAccess) + 0] - lonMin)/(lonMax - lonMin)
        xDataTSNorm[:,(colAccess) + 1] = (xDataTS[:,(colAccess) + 1] - latMin)/(latMax - latMin)
        colAccess = colAccess + 2

colAccess = 0
xDataFusionNorm = xDataFusion.copy()
if(useType == usefeature['USE_AS_FUSION']):
    colAccess = colAccess + maxTypes
if(useLen == usefeature['USE_AS_FUSION']):
    xDataFusionNorm[:,colAccess] = (xDataFusion[:,colAccess] - lenMin)/(lenMax - lenMin)
    colAccess = colAccess + 1
if(useDest == usefeature['USE_AS_FUSION']):
    xDataFusionNorm[:,(colAccess) + 0] = (xDataFusion[:,(colAccess) + 0] - lonMin)/(lonMax - lonMin)
    xDataFusionNorm[:,(colAccess) + 1] = (xDataFusion[:,(colAccess) + 1] - latMin)/(latMax - latMin)
    colAccess = colAccess + 2
    
xDataTSNorm = np.reshape(xDataTSNorm,(xDataTSNorm.shape[0], prevTS, numTSFeature))

In [None]:
print(xDataTSNorm.shape)
print(xDataFusionNorm.shape)

In [None]:
#normalise the output data as well
yLonLatData_0 = (yData[:,0] - lonMin)/(lonMax - lonMin)
yLonLatData_0 = np.reshape(yLonLatData_0,(yLonLatData_0.shape[0],1))
yLonLatData_1 = (yData[:,1] - latMin)/(latMax - latMin)
yLonLatData_1 = np.reshape(yLonLatData_1,(yLonLatData_1.shape[0],1))
yLonLatDataNorm = np.hstack((yLonLatData_0,yLonLatData_1))

In [None]:
print(yLonLatDataNorm.shape)

In [None]:
print(xDataTSNorm[0,:])
print(xDataFusionNorm[0,:])
print(yLonLatDataNorm[0,:])

In [None]:
from keras import Sequential
from keras.layers import Dense, LSTM
from keras.models import Model
from keras.layers import Input
from keras.layers import concatenate

In [None]:
from keras import backend as K
K.tensorflow_backend._get_available_gpus()

In [None]:
# model = Sequential()
# model.add(LSTM(units=50, return_sequences= True, input_shape=(2,4)))
# model.add(LSTM(units=50))
# model.add(Dense(150, activation='relu'))
# model.add(Dense(units=2, activation='linear'))
lonLatTS = Input(shape=(prevTS,numTSFeature))
hidden1 = LSTM(50, return_sequences= True)(lonLatTS)
hidden2 = LSTM(50)(hidden1)
if(xDataFusionNorm.shape[1] > 0):
    fusionIp = Input(shape=(xDataFusionNorm.shape[1],))
    fusionIp1 = Dense(50, activation='relu')(fusionIp)
    fusionIp2 = Dense(50, activation='relu')(fusionIp1)
    x = concatenate([hidden2,fusionIp2])
    lonLatDense = Dense(150, activation='relu')(x)
    lonLatOp = Dense(2, activation='linear')(lonLatDense)
    model = Model(inputs=[lonLatTS, fusionIp], outputs=lonLatOp)
else:
    lonLatDense = Dense(150, activation='relu')(hidden2)
    lonLatOp = Dense(2, activation='linear')(lonLatDense)
    model = Model(inputs=lonLatTS, outputs=lonLatOp)

In [None]:
model.summary()

In [None]:
model.compile(optimizer='adam', loss='mse')

In [None]:
if xDataFusionNorm.shape[1] > 0:
    model.fit([xDataTSNorm, xDataFusionNorm], yLonLatDataNorm, epochs=1000, batch_size = 512 , verbose = 2)
else:
    
    model.fit(xDataTSNorm, yLonLatDataNorm, epochs=1000, batch_size = 512 , verbose = 2)

In [None]:
modelDir = dataDir + "Model_1000_MSE.h5"

In [None]:
model.save(modelDir)

In [None]:
from keras.models import load_model
model = load_model(modelDir)

In [None]:
def normalize_lon_lat(arr):
    """
    """
    #subtract the minimum 
    #and divide by range
    ret0 = (arr[:,0] - lonMin)/(lonMax - lonMin)
    ret0 = np.reshape(ret0, (ret0.shape[0],1))
    ret1 = (arr[:,1] - latMin)/(latMax - latMin)
    ret1 = np.reshape(ret1, (ret1.shape[0],1))
    ret = np.hstack((ret0, ret1))
    return ret

In [None]:
#takes 2 time stamps of LON and LAT
#normalises them and 
#and makes the prediction
#de normalize the output
#and return the values
#prevTraj numpy array of 1x2
#currTraj numpy array of 1x2
# def compute_30_min_pred(prevTraj, currTraj, typeVessel):
def compute_30_min_pred(prevTraj, typeVal, lenVal, destVal):
    """
    Compute prediction for 30 minutes.
    
    takes LON and LAT of previous time stamps
    normalises them
    and makes prediction
    and returns denormalised LON and LAT values
    """
    #this will be    
    prevTimeStamp = prevTraj.shape[0]
    prevTrajNorm = normalize_lon_lat(prevTraj)
    
    if(useType == usefeature['USE_AS_TS']):
        typeData = np.zeros((prevTimeStamp, maxTypes))
        #one hot encoding
        typeData[:,typeVal] = 1
    elif(useType == usefeature['USE_AS_FUSION']):
        typeData = np.zeros((1, maxTypes))
        typeData[:,typeVal] = 1
        
    if(useLen == usefeature['USE_AS_TS']):
        lenData = np.zeros((prevTimeStamp, 1))
        lenData[:,:] = lenVal
        lenData[:,:] = (lenData[:,:] - lenMin)/(lenMax - lenMin)
    elif(useLen == usefeature['USE_AS_FUSION']):
        lenData = np.zeros((1, 1))    
        lenData[:,:] = lenVal
        lenData[:,:] = (lenData[:,:] - lenMin)/(lenMax - lenMin)
    
    
    if(useDest == usefeature['USE_AS_TS']):
        destArr = np.zeros((prevTimeStamp,2))
        destArr[:,0] = destVal[0]
        destArr[:,1] = destVal[1]
        
        destArr[:,0] = (destArr[:,0] - lonMin)/(lonMax - lonMin)
        destArr[:,1] = (destArr[:,1] - latMin)/(latMax - latMin)
    elif(useDest == usefeature['USE_AS_FUSION']):
        destArr = np.zeros((1,2))
        destArr[:,0] = destVal[0]
        destArr[:,1] = destVal[1]
        
        destArr[:,0] = (destArr[:,0] - lonMin)/(lonMax - lonMin)
        destArr[:,1] = (destArr[:,1] - latMin)/(latMax - latMin)
    
    
    xTSNorm = np.zeros((prevTimeStamp,0))
    xFNorm = np.zeros((1,0))
    
    xTSNorm = np.hstack((xTSNorm,prevTrajNorm))
    if(useType == usefeature['USE_AS_TS']):
        xTSNorm = np.hstack((xTSNorm,typeData))
    if(useLen == usefeature['USE_AS_TS']):
        xTSNorm = np.hstack((xTSNorm,lenData))
    if(useDest == usefeature['USE_AS_TS']):
        xTSNorm = np.hstack((xTSNorm,destArr))
            
    if(useType == usefeature['USE_AS_FUSION']):
        xFNorm = np.hstack((xFNorm,typeData))
    if(useLen == usefeature['USE_AS_FUSION']):
        xFNorm = np.hstack((xFNorm,lenData))
    if(useDest == usefeature['USE_AS_FUSION']):
        xFNorm = np.hstack((xFNorm,destArr))
    
    xTSNorm = np.reshape(xTSNorm,(1,xTSNorm.shape[0],xTSNorm.shape[1]))
    if(xFNorm.shape[1] > 0):
        predLatLon = model.predict([xTSNorm, xFNorm])
    else:
        predLatLon = model.predict(xTSNorm)
    
    predLon = predLatLon[0,0]
    predLat = predLatLon[0,1]
    
    #after prediction de normalise it
    predLonScaled = (predLon * (lonMax - lonMin)) + lonMin
    predLatScaled = (predLat * (latMax - latMin)) + latMin
    return predLonScaled, predLatScaled

In [None]:
def compute_n_30_min_pred(prevTraj, typeVal, lenVal, destVal, n = 1):
    """
    Compute prediction for n*30 minutes.
    
    calls compute_30_min_pred
    according to value of n
    default value of n = 1
    """
    #temp vaariables to store the previous trajectory
    prevTimeStamp = prevTraj.shape[0]
    
    
    firstLoc = prevTraj.copy()
    
    #return value will be numpy array of nx2
    ret = np.zeros((0,2))
    
    for i in range(n):
        #make prediction using 30 min pred
        predLon, predLat = compute_30_min_pred(firstLoc, typeVal, lenVal, destVal)
        #append them in ret
        ret = np.vstack((ret,np.array([[predLon,predLat]])))
        
        #update firstLoc 
        #for next  iteration
        firstLoc = firstLoc[1:,:].copy()
        firstLoc = np.vstack((firstLoc,np.array([[predLon,predLat]])))
    return ret

In [None]:
def get_error_for_traj(srcDir, num, typeVessel, destVal):
    """
    Compute error for the entire trajectory.
    
    Returns list of error for predictions of upto 
    4 hours i.e. 240 minutes.
    
    Parameters:
        num (int): number of vessel trajectory 
                   whose prediction errors to be computed.
    Returns:
        errorVal (list of floats): list of errors in KM for each
                                   30 min predictions.
    """    
    errorVal = []
    if(useLen == 0):
        vesselTraj = get_traj_lon_lat_data(srcDir, num)
        lenData = -1
    else:
        vesselTraj,lenData = get_traj_lon_lat_data_with_len(srcDir, num)

    if(vesselTraj.shape[0] < (prevTS + 1)):
        return errorVal
        
        
    predVesselTraj = compute_n_30_min_pred(np.reshape(vesselTraj[0:prevTS,:], (prevTS,2)), typeVessel, lenData, destVal, n = 8)
    predRange = vesselTraj.shape[0] - prevTS

    #FIXME 8 can come from some variable too
    #for 8 consecutive predictions
    if(predRange > 8):
        predRange = 8

    #use trajectories which has atleast one
    #value to predict
    if(predRange >= 0):
        for pred in range(predRange):    
            trueLon, trueLat = vesselTraj[(prevTS + pred),0], vesselTraj[(prevTS + pred),1]
            predLon, predLat = predVesselTraj[pred,0], predVesselTraj[pred,1]
    #         print(trueLon, trueLat, '-', predLon, predLat)
            errorVal.append(gC.compute_distance(trueLon, trueLat, predLon, predLat))
    return errorVal

In [None]:
help(get_error_for_traj)

In [None]:
#store errors for all training trajectories
trainDataWholeErrors_1004 = []
for traj in range(vesselDataSources[0].trainNum):
    trainDataWholeErrors_1004.append(get_error_for_traj(vesselDataSources[0].srcDir,traj,vesselDataSources[0].type,destFeature[traj]))

In [None]:
#segregate those list of errors into
#list of 30 minErr, 60 minErr, ...
trainWholeErr_1004_30 = []
trainWholeErr_1004_60 = []
trainWholeErr_1004_90 = []
trainWholeErr_1004_120 = []
trainWholeErr_1004_150 = []
trainWholeErr_1004_180 = []
trainWholeErr_1004_210 = []
trainWholeErr_1004_240 = []

trainWholeErr_1004_n_30 = [trainWholeErr_1004_30 \
, trainWholeErr_1004_60 \
, trainWholeErr_1004_90 \
, trainWholeErr_1004_120 \
, trainWholeErr_1004_150 \
, trainWholeErr_1004_180 \
, trainWholeErr_1004_210 \
, trainWholeErr_1004_240 \
]

for trajErr in trainDataWholeErrors_1004:
    #take the list
    for n_30 in range(8):
        if(len(trajErr) > n_30):
            trainWholeErr_1004_n_30[n_30].append(trajErr[n_30])

In [None]:
def format_func(value, tick_number):
    tempTick = (value*30) + 30
    ret = "%d"%(tempTick)
    return ret

In [None]:
trainWholeErr_1004_30NP = np.array(trainWholeErr_1004_30)
trainWholeErr_1004_60NP = np.array(trainWholeErr_1004_60)
trainWholeErr_1004_90NP = np.array(trainWholeErr_1004_90)
trainWholeErr_1004_120NP = np.array(trainWholeErr_1004_120)
trainWholeErr_1004_150NP = np.array(trainWholeErr_1004_150)
trainWholeErr_1004_180NP = np.array(trainWholeErr_1004_180)
trainWholeErr_1004_210NP = np.array(trainWholeErr_1004_210)
trainWholeErr_1004_240NP = np.array(trainWholeErr_1004_240)

In [None]:
trainWholeErrMean_1004 = [np.mean(trainWholeErr_1004_30NP) \
            ,np.mean(trainWholeErr_1004_60NP) \
            ,np.mean(trainWholeErr_1004_90NP) \
            ,np.mean(trainWholeErr_1004_120NP) \
            ,np.mean(trainWholeErr_1004_150NP) \
            ,np.mean(trainWholeErr_1004_180NP) \
            ,np.mean(trainWholeErr_1004_210NP) \
            ,np.mean(trainWholeErr_1004_240NP) \
           ]

In [None]:
fig = plt.figure()
ax = fig.add_subplot()
ax.set_title("Average Error Value On Training 1004")
ax.set_ylabel("Distance in KM")
ax.set_xlabel("Time in Minutes")
ax.plot(trainWholeErrMean_1004,label = "LSTM")
ax.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
ax.legend()

In [None]:
#store errors for all training trajectories
testDataWholeErrors_1004 = []
for traj in range(vesselDataSources[0].trainNum,vesselDataSources[0].testNumEnd):
    testDataWholeErrors_1004.append(get_error_for_traj(vesselDataSources[0].srcDir,traj,vesselDataSources[0].type,destFeature[traj]))

In [None]:
#segregate those list of errors into
#list of 30 minErr, 60 minErr, ...
testWholeErr_1004_30 = []
testWholeErr_1004_60 = []
testWholeErr_1004_90 = []
testWholeErr_1004_120 = []
testWholeErr_1004_150 = []
testWholeErr_1004_180 = []
testWholeErr_1004_210 = []
testWholeErr_1004_240 = []

testWholeErr_1004_n_30 = [testWholeErr_1004_30 \
, testWholeErr_1004_60 \
, testWholeErr_1004_90 \
, testWholeErr_1004_120 \
, testWholeErr_1004_150 \
, testWholeErr_1004_180 \
, testWholeErr_1004_210 \
, testWholeErr_1004_240 \
]

for trajErr in testDataWholeErrors_1004:
    #take the list
    for n_30 in range(8):
        if(len(trajErr) > n_30):
            testWholeErr_1004_n_30[n_30].append(trajErr[n_30])

In [None]:
testWholeErr_1004_30NP = np.array(testWholeErr_1004_30)
testWholeErr_1004_60NP = np.array(testWholeErr_1004_60)
testWholeErr_1004_90NP = np.array(testWholeErr_1004_90)
testWholeErr_1004_120NP = np.array(testWholeErr_1004_120)
testWholeErr_1004_150NP = np.array(testWholeErr_1004_150)
testWholeErr_1004_180NP = np.array(testWholeErr_1004_180)
testWholeErr_1004_210NP = np.array(testWholeErr_1004_210)
testWholeErr_1004_240NP = np.array(testWholeErr_1004_240)

In [None]:
testWholeErrMean_1004 = [np.mean(testWholeErr_1004_30NP) \
            ,np.mean(testWholeErr_1004_60NP) \
            ,np.mean(testWholeErr_1004_90NP) \
            ,np.mean(testWholeErr_1004_120NP) \
            ,np.mean(testWholeErr_1004_150NP) \
            ,np.mean(testWholeErr_1004_180NP) \
            ,np.mean(testWholeErr_1004_210NP) \
            ,np.mean(testWholeErr_1004_240NP) \
           ]

In [None]:
fig = plt.figure()
ax = fig.add_subplot()
ax.set_title("Average Error Value On Testing 1004")
ax.set_ylabel("Distance in KM")
ax.set_xlabel("Time in Minutes")
ax.plot(testWholeErrMean_1004,label = "LSTM")
ax.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
ax.legend()

In [None]:
dataToStore = dataDir + "trainWholeErr_1004_30NP.npy"
np.save(dataToStore, trainWholeErr_1004_30NP)
dataToStore = dataDir + "trainWholeErr_1004_60NP.npy"
np.save(dataToStore, trainWholeErr_1004_60NP)
dataToStore = dataDir + "trainWholeErr_1004_90NP.npy"
np.save(dataToStore, trainWholeErr_1004_90NP)
dataToStore = dataDir + "trainWholeErr_1004_120NP.npy"
np.save(dataToStore, trainWholeErr_1004_120NP)
dataToStore = dataDir + "trainWholeErr_1004_150NP.npy"
np.save(dataToStore, trainWholeErr_1004_150NP)
dataToStore = dataDir + "trainWholeErr_1004_180NP.npy"
np.save(dataToStore, trainWholeErr_1004_180NP)
dataToStore = dataDir + "trainWholeErr_1004_210NP.npy"
np.save(dataToStore, trainWholeErr_1004_210NP)
dataToStore = dataDir + "trainWholeErr_1004_240NP.npy"
np.save(dataToStore, trainWholeErr_1004_240NP)

In [None]:
dataToStore = dataDir + "testWholeErr_1004_30NP.npy"
np.save(dataToStore, testWholeErr_1004_30NP)
dataToStore = dataDir + "testWholeErr_1004_60NP.npy"
np.save(dataToStore, testWholeErr_1004_60NP)
dataToStore = dataDir + "testWholeErr_1004_90NP.npy"
np.save(dataToStore, testWholeErr_1004_90NP)
dataToStore = dataDir + "testWholeErr_1004_120NP.npy"
np.save(dataToStore, testWholeErr_1004_120NP)
dataToStore = dataDir + "testWholeErr_1004_150NP.npy"
np.save(dataToStore, testWholeErr_1004_150NP)
dataToStore = dataDir + "testWholeErr_1004_180NP.npy"
np.save(dataToStore, testWholeErr_1004_180NP)
dataToStore = dataDir + "testWholeErr_1004_210NP.npy"
np.save(dataToStore, testWholeErr_1004_210NP)
dataToStore = dataDir + "testWholeErr_1004_240NP.npy"
np.save(dataToStore, testWholeErr_1004_240NP)