In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
%matplotlib inline
import sys
import os
import math
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.animation as animation
from mpl_toolkits.basemap import Basemap
from matplotlib.patches import Circle
import seaborn as sns; 
from IPython.display import HTML

In [None]:
#config parser
import configparser

sys.path.insert(0, '../Common/')
from AISDataManager import AISDataManager
import Constants as c
import HMUtils as hMUtil
import TimeUtils as timeUtils
import GeoCompute as gC

#MyConfig.INI stores all the run time constants
config = configparser.ConfigParser()
config.read('../MyConfig.INI')

from joblib import Parallel, delayed
import multiprocessing
aISDM = AISDataManager()

In [None]:
lonMin = (float)(config['TRAJ_PRED_LSTM_GENERAL_MORE']['LON_MIN'])
lonMax = (float)(config['TRAJ_PRED_LSTM_GENERAL_MORE']['LON_MAX'])

latMin = (float)(config['TRAJ_PRED_LSTM_GENERAL_MORE']['LAT_MIN'])
latMax = (float)(config['TRAJ_PRED_LSTM_GENERAL_MORE']['LAT_MAX'])

print(lonMin,latMin)
print(lonMax,latMax)

increStep = (float)(config['TRAJ_PRED_LSTM_GENERAL_MORE']['INCR_STEP'])
incrRes = (int)(config['TRAJ_PRED_LSTM_GENERAL_MORE']['INCR_RES'])

sourceDir = config['TRAJ_PRED_LSTM_GENERAL_MORE']['SOURCE_DIR']
trainTrajNum = (int)(config['TRAJ_PRED_LSTM_GENERAL_MORE']['TRAIN_DATA'])
testEndTrajNum = (int)(config['TRAJ_PRED_LSTM_GENERAL_MORE']['TEST_END'])
# dataDir = config['TRAJ_PRED_LSTM_GENERAL_MORE']['DATA_DIR']
dataDir = "../Data/M122_00_M119_50_34_00_36_00/General/15_16_17_3_TS_MON_DEST_TYPE_SOG_COG_GRID_TS/"
sogMeanFile = "../Data/M122_00_M119_50_34_00_36_00/Output/SOG_15_16_17_1004.npy"
sogVarFile = "../Data/M122_00_M119_50_34_00_36_00/Output/SOG_15_16_17_1004_Var.npy"

cogMeanFile = "../Data/M122_00_M119_50_34_00_36_00/Output/COG_15_16_17_1004_Mean.npy"
cogVarFile = "../Data/M122_00_M119_50_34_00_36_00/Output/COG_15_16_17_1004_Var.npy"
cogMinFile = "../Data/M122_00_M119_50_34_00_36_00/Output/COG_15_16_17_1004_Min.npy"
cogMaxFile = "../Data/M122_00_M119_50_34_00_36_00/Output/COG_15_16_17_1004_Max.npy"
cogMedianFile = "../Data/M122_00_M119_50_34_00_36_00/Output/COG_15_16_17_1004_Median.npy"
print(sourceDir)
print(dataDir)
prevTS = 3

In [None]:
heatMapGrid = hMUtil.generate_grid(lonMin, lonMax, latMin, latMax, increStep, incrRes)
boundaryArray = heatMapGrid[2]
horizontalAxis = heatMapGrid[0]
verticalAxis = heatMapGrid[1]
totalStates = horizontalAxis.shape[0] * verticalAxis.shape[0]
print(totalStates)

In [None]:
def get_traj_lon_lat_data(sourceDir, num):
    """
    """
    #read the dataframe
    sorceFile = sourceDir + str(num) + '.csv'
    sourceDF,_ = aISDM.load_data_from_csv(sorceFile)
    #return LON and LAT column
    return sourceDF.loc[:,['LON','LAT']].to_numpy()

def get_index_from_lon_lat_cord(lon, lat):
    retVal = -1
#     lat = latLonRow['LAT']
#     lon = latLonRow['LON']
    for boundary in boundaryArray: 
        if(lon >= boundary[0]) and (lon < boundary[1]) \
            and (lat >= boundary[2]) and (lat < boundary[3]):
            retVal = boundary[4]
            break 
    return retVal

def get_index_from_lon_lat(latLonRow):
    retVal = -1
    lat = latLonRow['LAT']
    lon = latLonRow['LON']
    for boundary in boundaryArray: 
        if(lon >= boundary[0]) and (lon < boundary[1]) \
            and (lat >= boundary[2]) and (lat < boundary[3]):
            retVal = boundary[4]
            break 
    return retVal

def convert_traj_df_to_state_sequence(sourceDF):
    #conver every LON and LAT to sequence of numbers
    ret = sourceDF.apply(get_index_from_lon_lat,axis=1)
    return ret.to_numpy()

In [None]:
sOGMean = np.load(sogMeanFile)
sOGVar = np.load(sogVarFile)
sOGSD = np.sqrt(sOGVar)
sOGSD = np.nan_to_num(sOGSD)
print(sOGMean.shape)
print(sOGVar.shape)
print(sOGSD.shape)

In [None]:
cOGMean = np.load(cogMeanFile)
cOGVar = np.load(cogVarFile)
cOGSD = np.sqrt(cOGVar)
cOGSD = np.nan_to_num(cOGSD)
cOGMedian = np.load(cogMedianFile)

In [None]:
cOGMin = np.min(cOGMean)
cOGMax = np.max(cOGMean)
print(cOGMin)
print(cOGMax)

cOGSDMin = np.min(cOGSD)
cOGSDMax = np.max(cOGSD)
print(cOGSDMin)
print(cOGSDMax)

In [None]:
cOGMedMin = np.min(cOGMedian)
cOGMedMax = np.max(cOGMedian)
print(cOGMedMin)
print(cOGMedMax)

In [None]:
def get_traj_lon_lat_data_with_len(sourceDir, num, vType = 0):
    """
    """
    #read the dataframe corresponding to traj number
    sorceFile = sourceDir + str(num) + '.csv'
    sourceDF,_ = aISDM.load_data_from_csv(sorceFile)
#     print(sourceDF.loc[0,'DateTime'])
    year,month,_ = sourceDF.loc[0,'DateTime'].split('-')
    trajState = convert_traj_df_to_state_sequence(sourceDF)
    sOGMeanNP = sOGMean[trajState].copy()
    sOGSDNP = sOGSD[trajState].copy()
    cOGMedianNP = cOGMedian[trajState].copy()
    
    return sourceDF.loc[:,['LON','LAT']].to_numpy(), sourceDF.loc[0,'Length'] \
            , int(year), int(month), sourceDF.loc[0,'DEST_LON'], sourceDF.loc[0,'DEST_LAT'] \
            , vType, np.reshape(sOGMeanNP, (sOGMeanNP.shape[0],1)) \
            , np.reshape(sOGSDNP, (sOGSDNP.shape[0],1)) \
            , np.reshape(cOGMedianNP, (cOGMedianNP.shape[0],1))

get_traj_lon_lat_data_with_len(sourceDir, 0)

In [None]:
#make list of all such trajectories
#this is training data
trajSeqList = []

for trajNum in range(0,trainTrajNum):
# for trajNum in range(0,2):
    seqData = get_traj_lon_lat_data_with_len(sourceDir,trajNum)
    trajSeqList.append(seqData)
    if(trajNum%100)==0:
        print("Done",trajNum)

In [None]:
print(trajSeqList[0])

In [None]:
def convert_seq_to_x_y_lon_lat(seq, lenVal, yearF, monF, destLon, destLat, vType, sOGVal, sOGSDVal, cOGVal, prevTimeStamp):
    """
    """
    #first column
    #-2 is is to take care of boundary condition
    #since we are considering 2 time stamps for the input data
    xNumRows = seq[:-(prevTS),:].shape[0]
        
    monData = np.zeros((xNumRows, 12))
    monData[:,(12-monF)] = 1
    
    destArr = np.zeros((xNumRows,2))
    destArr[:,0] = destLon
    destArr[:,1] = destLat
    
    typeArr = np.zeros((xNumRows,2))
    typeArr[:,vType] = 1
    
    lonLatColList = []
    sOGColList = []
    sOGSDColList = []
    cOGValList = []
    for start in range(prevTimeStamp):
        lonLatColList.append(seq[start:(-prevTimeStamp+start),:].copy())
        sOGColList.append(sOGVal[start:(-prevTimeStamp+start),:].copy())
        sOGSDColList.append(sOGSDVal[start:(-prevTimeStamp+start),:].copy())
        cOGValList.append(cOGVal[start:(-prevTimeStamp+start),:].copy())
        
    outputLabel = seq[prevTimeStamp:,:].copy()
    
    xDataTS = np.zeros((xNumRows,0))
    xDataFusion = np.zeros((xNumRows,0))
    
    for tS in range(prevTimeStamp):
        xDataTS = np.hstack((xDataTS,lonLatColList[tS]))
        xDataTS = np.hstack((xDataTS,sOGColList[tS]))
        xDataTS = np.hstack((xDataTS,sOGSDColList[tS]))
        xDataTS = np.hstack((xDataTS,cOGValList[tS]))
        xDataTS = np.hstack((xDataTS,typeArr))
        xDataTS = np.hstack((xDataTS,monData))
        xDataTS = np.hstack((xDataTS,destArr))
            
#     xDataFusion = np.hstack((xDataFusion,lenData))
#     xDataFusion = np.hstack((xDataFusion,yearData))
#     xDataFusion = np.hstack((xDataFusion,typeArr))
#     xDataFusion = np.hstack((xDataFusion,monData))
#     xDataFusion = np.hstack((xDataFusion,destArr))
    
    return xDataTS, xDataFusion, outputLabel

convert_seq_to_x_y_lon_lat(trajSeqList[0][0] \
                           ,trajSeqList[0][1] \
                           ,trajSeqList[0][2] \
                           ,trajSeqList[0][3] \
                           ,trajSeqList[0][4] \
                           ,trajSeqList[0][5] \
                           ,trajSeqList[0][6] \
                           ,trajSeqList[0][7] \
                           ,trajSeqList[0][8] \
                           ,trajSeqList[0][9] \
                           ,3)

In [None]:
#now iterate throgh trajSeqList 
#and keep on stacking them vertically
#to make giant input and output matrix
tSCol = 21
numTSFeature = tSCol

tSCol = tSCol * prevTS
xDataTS = np.zeros((0,tSCol))

fusionCol = 0
xDataFusion = np.zeros((0,fusionCol))
yData = np.zeros((0,2))
print(xDataTS.shape)
print(xDataFusion.shape)
print(yData.shape)
for trajNum in range(len(trajSeqList)):
    if((trajSeqList[trajNum][0].shape[0]) > prevTS):
            
        xTSTemp, xFusionTemp, yTemp = convert_seq_to_x_y_lon_lat(trajSeqList[trajNum][0] \
            ,trajSeqList[trajNum][1] \
            ,trajSeqList[trajNum][2] \
            ,trajSeqList[trajNum][3] \
            ,trajSeqList[trajNum][4] \
            ,trajSeqList[trajNum][5] \
            ,trajSeqList[trajNum][6] \
            ,trajSeqList[trajNum][7] \
            ,trajSeqList[trajNum][8] \
            ,trajSeqList[trajNum][9] \
            ,prevTS)
        xDataTS = np.vstack((xDataTS,xTSTemp.copy()))
        xDataFusion = np.vstack((xDataFusion,xFusionTemp.copy()))
        yData = np.vstack((yData,yTemp.copy()))
    else:
        print("Not enough trajectory")
    if(trajNum%100)==0:
        print(trajNum)
        
# print(xDataTS)
# print(xDataFusion)
# print(yData)

In [None]:
print(xDataTS.shape)
print(xDataFusion.shape)
print(yData.shape)

In [None]:
xTSToStore = dataDir + "XDataTS.npy"
xFToStore = dataDir + "XDataF.npy"
yToStore = dataDir + "YData.npy"
np.save(xTSToStore, xDataTS)
np.save(xFToStore, xDataFusion)
np.save(yToStore, yData)

In [None]:
xTSToStore = dataDir + "XDataTS.npy"
xFToStore = dataDir + "XDataF.npy"
yToStore = dataDir + "YData.npy"

xDataTS = np.load(xTSToStore)
xDataFusion = np.load(xFToStore)
yData = np.load(yToStore)

In [None]:
print(xDataTS.shape)
print(xDataFusion.shape)
print(yData.shape)

In [None]:
sOGMin = np.min(sOGMean)
sOGMax = np.max(sOGMean)
print(sOGMin)
print(sOGMax)

sOGSDMin = np.min(sOGSD)
sOGSDMax = np.max(sOGSD)
print(sOGSDMin)
print(sOGSDMax)

In [None]:
xDataTSNorm = xDataTS.copy()
colAccess = 0
for prevTime in range(prevTS):
    xDataTSNorm[:,(colAccess) + 0] = (xDataTS[:,(colAccess) + 0] - lonMin)/(lonMax - lonMin)
    xDataTSNorm[:,(colAccess) + 1] = (xDataTS[:,(colAccess) + 1] - latMin)/(latMax - latMin)
    xDataTSNorm[:,(colAccess) + 2] = (xDataTS[:,(colAccess) + 2] - sOGMin)/(sOGMax - sOGMin)
    xDataTSNorm[:,(colAccess) + 3] = (xDataTS[:,(colAccess) + 3] - sOGSDMin)/(sOGSDMax - sOGSDMin)
    xDataTSNorm[:,(colAccess) + 4] = (xDataTS[:,(colAccess) + 4] - cOGMedMin)/(cOGMedMax - cOGMedMin)
    xDataTSNorm[:,(colAccess) + 19] = (xDataTS[:,(colAccess) + 19] - lonMin)/(lonMax - lonMin)
    xDataTSNorm[:,(colAccess) + 20] = (xDataTS[:,(colAccess) + 20] - latMin)/(latMax - latMin)
    colAccess = colAccess + 21

xDataFusionNorm = xDataFusion.copy()

# xDataFusionNorm[:,0] = (xDataFusion[:,0] - lenMin)/(lenMax - lenMin)
# xDataFusionNorm[:,-2] = (xDataFusion[:,-2] - lonMin)/(lonMax - lonMin)
# xDataFusionNorm[:,-1] = (xDataFusion[:,-1] - latMin)/(latMax - latMin)
    
xDataTSNorm = np.reshape(xDataTSNorm,(xDataTSNorm.shape[0], prevTS, numTSFeature))

In [None]:
print(xDataTSNorm.shape)
print(xDataFusionNorm.shape)

In [None]:
#normalise the output data as well
yLonLatData_0 = (yData[:,0] - lonMin)/(lonMax - lonMin)
yLonLatData_0 = np.reshape(yLonLatData_0,(yLonLatData_0.shape[0],1))
yLonLatData_1 = (yData[:,1] - latMin)/(latMax - latMin)
yLonLatData_1 = np.reshape(yLonLatData_1,(yLonLatData_1.shape[0],1))
yLonLatDataNorm = np.hstack((yLonLatData_0,yLonLatData_1))

In [None]:
print(yLonLatDataNorm.shape)

In [None]:
print(yData)

In [None]:
print(xDataTSNorm[0,:,:])
print(xDataFusionNorm[0,:])
print(yLonLatDataNorm[0,:])
print(xDataTSNorm[1,:,:])
print(xDataFusionNorm[1,:])
print(yLonLatDataNorm[1,:])

In [None]:
from keras import Sequential
from keras.layers import Dense, LSTM
from keras.models import Model
from keras.layers import Input
from keras.layers import concatenate

In [None]:
from keras import backend as K
K.tensorflow_backend._get_available_gpus()

In [None]:
# model = Sequential()
# model.add(LSTM(units=50, return_sequences= True, input_shape=(2,4)))
# model.add(LSTM(units=50))
# model.add(Dense(150, activation='relu'))
# model.add(Dense(units=2, activation='linear'))
lonLatTS = Input(shape=(prevTS,numTSFeature))
hidden1 = LSTM(50, return_sequences= True)(lonLatTS)
hidden2 = LSTM(50)(hidden1)
if(xDataFusionNorm.shape[1] > 0):
    fusionIp = Input(shape=(xDataFusionNorm.shape[1],))
    fusionIp1 = Dense(50, activation='relu')(fusionIp)
    fusionIp2 = Dense(50, activation='relu')(fusionIp1)
    x = concatenate([hidden2,fusionIp2])
    lonLatDense = Dense(150, activation='relu')(x)
    lonLatOp = Dense(2, activation='linear')(lonLatDense)
    model = Model(inputs=[lonLatTS, fusionIp], outputs=lonLatOp)
else:
    lonLatDense = Dense(150, activation='relu')(hidden2)
    lonLatOp = Dense(2, activation='linear')(lonLatDense)
    model = Model(inputs=lonLatTS, outputs=lonLatOp)

In [None]:
model.summary()

In [None]:
model.compile(optimizer='adam', loss='mse')

In [None]:
if xDataFusionNorm.shape[1] > 0:
    model.fit([xDataTSNorm, xDataFusionNorm], yLonLatDataNorm, epochs=5, batch_size = 512 , verbose = 2)
else:
    modelHist = model.fit(xDataTSNorm, yLonLatDataNorm, epochs=1000, batch_size = 1024 , verbose = 2)

In [None]:
modelDir = dataDir + "Model_1000_MSE.h5"

In [None]:
model.save(modelDir)

In [None]:
from keras.models import load_model
model = load_model(modelDir)

In [None]:
def normalize_lon_lat(arr):
    """
    """
    #subtract the minimum 
    #and divide by range
    ret0 = (arr[:,0] - lonMin)/(lonMax - lonMin)
    ret0 = np.reshape(ret0, (ret0.shape[0],1))
    ret1 = (arr[:,1] - latMin)/(latMax - latMin)
    ret1 = np.reshape(ret1, (ret1.shape[0],1))
    ret = np.hstack((ret0, ret1))
    return ret

In [None]:
#takes 2 time stamps of LON and LAT
#normalises them and 
#and makes the prediction
#de normalize the output
#and return the values
#prevTraj numpy array of 1x2
#currTraj numpy array of 1x2
# def compute_30_min_pred(prevTraj, currTraj, typeVessel):
def compute_30_min_pred(prevTraj, lenVal, yearVal, monVal, destLon, destLat, vType, sOGVal, sOGSDVal, cOGVal):
    """
    Compute prediction for 30 minutes.
    
    takes LON and LAT of previous time stamps
    normalises them
    and makes prediction
    and returns denormalised LON and LAT values
    """
    #this will be    
    prevTimeStamp = prevTraj.shape[0]
    prevTrajNorm = normalize_lon_lat(prevTraj)
        
    monData = np.zeros((prevTimeStamp, 12))
    monData[:,(12-monVal)] = 1
    
    destArr = np.zeros((prevTimeStamp,2))
    destArr[:,0] = destLon
    destArr[:,1] = destLat

    destArr[:,0] = (destArr[:,0] - lonMin)/(lonMax - lonMin)
    destArr[:,1] = (destArr[:,1] - latMin)/(latMax - latMin)
    
    typeData = np.zeros((prevTimeStamp,2))
    typeData[:,vType] = 1
    
    xTSNorm = np.zeros((prevTimeStamp,0))
    xFNorm = np.zeros((1,0))
    
    sOGValNorm = (sOGVal[:,:] - sOGMin)/(sOGMax - sOGMin)
    sOGSDValNorm = (sOGSDVal[:,:] - sOGSDMin)/(sOGSDMax - sOGSDMin)
    cOGValNorm = (cOGVal[:,:] - cOGMedMin)/(cOGMedMax - cOGMedMin)
    
    xTSNorm = np.hstack((xTSNorm,prevTrajNorm))
    xTSNorm = np.hstack((xTSNorm,sOGValNorm))
    xTSNorm = np.hstack((xTSNorm,sOGSDValNorm))
    xTSNorm = np.hstack((xTSNorm,cOGValNorm))
    xTSNorm = np.hstack((xTSNorm,typeData))
    xTSNorm = np.hstack((xTSNorm,monData))
    xTSNorm = np.hstack((xTSNorm,destArr))

#     xFNorm = np.hstack((xFNorm,typeData))
#     xFNorm = np.hstack((xFNorm,monData))
#     xFNorm = np.hstack((xFNorm,destArr))
    
    xTSNorm = np.reshape(xTSNorm,(1,xTSNorm.shape[0],xTSNorm.shape[1]))
    if(xFNorm.shape[1] > 0):
        predLatLon = model.predict([xTSNorm, xFNorm])
    else:
        predLatLon = model.predict(xTSNorm)
    
    predLon = predLatLon[0,0]
    predLat = predLatLon[0,1]
    
    #after prediction de normalise it
    predLonScaled = (predLon * (lonMax - lonMin)) + lonMin
    predLatScaled = (predLat * (latMax - latMin)) + latMin
    return predLonScaled, predLatScaled

In [None]:
def compute_n_30_min_pred(prevTraj, lenVal, yearVal, monVal, destLon, destLat, vType, sOGVal ,sOGSDVal, cOGVal, n = 1):
    """
    Compute prediction for n*30 minutes.
    
    calls compute_30_min_pred
    according to value of n
    default value of n = 1
    """
    #temp vaariables to store the previous trajectory
    prevTimeStamp = prevTraj.shape[0]
    
    
    firstLoc = prevTraj.copy()
    firstSOG = sOGVal.copy()
    firstSOGSD = sOGSDVal.copy()
    firstCOG = cOGVal.copy()

    
    #return value will be numpy array of nx2
    ret = np.zeros((0,2))
    
    for i in range(n):
        #make prediction using 30 min pred
        predLon, predLat = compute_30_min_pred(firstLoc, lenVal, yearVal, monVal, destLon, destLat, vType, firstSOG, firstSOGSD, firstCOG)
        #append them in ret
        ret = np.vstack((ret,np.array([[predLon,predLat]])))
        
        #update firstLoc 
        #for next  iteration
        firstLoc = firstLoc[1:,:].copy()
        firstLoc = np.vstack((firstLoc,np.array([[predLon,predLat]])))
        firstSOG = firstSOG[1:,:].copy()
        firstSOGSD = firstSOGSD[1:,:].copy()
        firstCOG = firstCOG[1:,:].copy()
        trajState = get_index_from_lon_lat_cord(predLon,predLat)
        firstSOG = np.vstack((firstSOG,np.array([[sOGMean[trajState]]])))
        firstSOGSD = np.vstack((firstSOGSD,np.array([[sOGSD[trajState]]])))    
        firstCOG = np.vstack((firstCOG,np.array([[cOGMedian[trajState]]])))
    return ret

In [None]:
def get_error_for_traj(srcDir, num):
    """
    Compute error for the entire trajectory.
    
    Returns list of error for predictions of upto 
    4 hours i.e. 240 minutes.
    
    Parameters:
        num (int): number of vessel trajectory 
                   whose prediction errors to be computed.
    Returns:
        errorVal (list of floats): list of errors in KM for each
                                   30 min predictions.
    """    
    errorVal = []
    
    vesselTraj,lenData,yearData,monData,destLon,destLat = get_traj_lon_lat_data_with_len(srcDir, num)

    if(vesselTraj.shape[0] < (prevTS + 1)):
        return errorVal
        
        
    predVesselTraj = compute_n_30_min_pred(np.reshape(vesselTraj[0:prevTS,:], (prevTS,2)), lenData, yearData, monData, destLon, destLat, n = 8)
    predRange = vesselTraj.shape[0] - prevTS

    #FIXME 8 can come from some variable too
    #for 8 consecutive predictions
    if(predRange > 8):
        predRange = 8

    #use trajectories which has atleast one
    #value to predict
    if(predRange >= 0):
        for pred in range(predRange):    
            trueLon, trueLat = vesselTraj[(prevTS + pred),0], vesselTraj[(prevTS + pred),1]
            predLon, predLat = predVesselTraj[pred,0], predVesselTraj[pred,1]
    #         print(trueLon, trueLat, '-', predLon, predLat)
            errorVal.append(gC.compute_distance(trueLon, trueLat, predLon, predLat))
    return errorVal

In [None]:
help(get_error_for_traj)

In [None]:
#store errors for all training trajectories
trainDataWholeErrors_1004 = []
for traj in range(trainTrajNum):
    trainDataWholeErrors_1004.append(get_error_for_traj(sourceDir,traj))

In [None]:
#segregate those list of errors into
#list of 30 minErr, 60 minErr, ...
trainWholeErr_1004_30 = []
trainWholeErr_1004_60 = []
trainWholeErr_1004_90 = []
trainWholeErr_1004_120 = []
trainWholeErr_1004_150 = []
trainWholeErr_1004_180 = []
trainWholeErr_1004_210 = []
trainWholeErr_1004_240 = []

trainWholeErr_1004_n_30 = [trainWholeErr_1004_30 \
, trainWholeErr_1004_60 \
, trainWholeErr_1004_90 \
, trainWholeErr_1004_120 \
, trainWholeErr_1004_150 \
, trainWholeErr_1004_180 \
, trainWholeErr_1004_210 \
, trainWholeErr_1004_240 \
]

for trajErr in trainDataWholeErrors_1004:
    #take the list
    for n_30 in range(8):
        if(len(trajErr) > n_30):
            trainWholeErr_1004_n_30[n_30].append(trajErr[n_30])

In [None]:
def format_func(value, tick_number):
    tempTick = (value*30) + 30
    ret = "%d"%(tempTick)
    return ret

In [None]:
trainWholeErr_1004_30NP = np.array(trainWholeErr_1004_30)
trainWholeErr_1004_60NP = np.array(trainWholeErr_1004_60)
trainWholeErr_1004_90NP = np.array(trainWholeErr_1004_90)
trainWholeErr_1004_120NP = np.array(trainWholeErr_1004_120)
trainWholeErr_1004_150NP = np.array(trainWholeErr_1004_150)
trainWholeErr_1004_180NP = np.array(trainWholeErr_1004_180)
trainWholeErr_1004_210NP = np.array(trainWholeErr_1004_210)
trainWholeErr_1004_240NP = np.array(trainWholeErr_1004_240)

In [None]:
trainWholeErrMean_1004 = [np.mean(trainWholeErr_1004_30NP) \
            ,np.mean(trainWholeErr_1004_60NP) \
            ,np.mean(trainWholeErr_1004_90NP) \
            ,np.mean(trainWholeErr_1004_120NP) \
            ,np.mean(trainWholeErr_1004_150NP) \
            ,np.mean(trainWholeErr_1004_180NP) \
            ,np.mean(trainWholeErr_1004_210NP) \
            ,np.mean(trainWholeErr_1004_240NP) \
           ]

In [None]:
fig = plt.figure()
ax = fig.add_subplot()
ax.set_title("Average Error Value On Training 1004")
ax.set_ylabel("Distance in KM")
ax.set_xlabel("Time in Minutes")
ax.plot(trainWholeErrMean_1004,label = "LSTM")
ax.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
ax.legend()

In [None]:
#store errors for all training trajectories
testDataWholeErrors_1004 = []
for traj in range(trainTrajNum,testEndTrajNum):
    testDataWholeErrors_1004.append(get_error_for_traj(sourceDir,traj))

In [None]:
#segregate those list of errors into
#list of 30 minErr, 60 minErr, ...
testWholeErr_1004_30 = []
testWholeErr_1004_60 = []
testWholeErr_1004_90 = []
testWholeErr_1004_120 = []
testWholeErr_1004_150 = []
testWholeErr_1004_180 = []
testWholeErr_1004_210 = []
testWholeErr_1004_240 = []

testWholeErr_1004_n_30 = [testWholeErr_1004_30 \
, testWholeErr_1004_60 \
, testWholeErr_1004_90 \
, testWholeErr_1004_120 \
, testWholeErr_1004_150 \
, testWholeErr_1004_180 \
, testWholeErr_1004_210 \
, testWholeErr_1004_240 \
]

for trajErr in testDataWholeErrors_1004:
    #take the list
    for n_30 in range(8):
        if(len(trajErr) > n_30):
            testWholeErr_1004_n_30[n_30].append(trajErr[n_30])

In [None]:
testWholeErr_1004_30NP = np.array(testWholeErr_1004_30)
testWholeErr_1004_60NP = np.array(testWholeErr_1004_60)
testWholeErr_1004_90NP = np.array(testWholeErr_1004_90)
testWholeErr_1004_120NP = np.array(testWholeErr_1004_120)
testWholeErr_1004_150NP = np.array(testWholeErr_1004_150)
testWholeErr_1004_180NP = np.array(testWholeErr_1004_180)
testWholeErr_1004_210NP = np.array(testWholeErr_1004_210)
testWholeErr_1004_240NP = np.array(testWholeErr_1004_240)

In [None]:
testWholeErrMean_1004 = [np.mean(testWholeErr_1004_30NP) \
            ,np.mean(testWholeErr_1004_60NP) \
            ,np.mean(testWholeErr_1004_90NP) \
            ,np.mean(testWholeErr_1004_120NP) \
            ,np.mean(testWholeErr_1004_150NP) \
            ,np.mean(testWholeErr_1004_180NP) \
            ,np.mean(testWholeErr_1004_210NP) \
            ,np.mean(testWholeErr_1004_240NP) \
           ]

In [None]:
fig = plt.figure()
ax = fig.add_subplot()
ax.set_title("Average Error Value On Testing 1004")
ax.set_ylabel("Distance in KM")
ax.set_xlabel("Time in Minutes")
ax.plot(testWholeErrMean_1004,label = "LSTM")
ax.xaxis.set_major_formatter(plt.FuncFormatter(format_func))
ax.legend()

In [None]:
dataToStore = dataDir + "trainWholeErr_1004_30NP.npy"
np.save(dataToStore, trainWholeErr_1004_30NP)
dataToStore = dataDir + "trainWholeErr_1004_60NP.npy"
np.save(dataToStore, trainWholeErr_1004_60NP)
dataToStore = dataDir + "trainWholeErr_1004_90NP.npy"
np.save(dataToStore, trainWholeErr_1004_90NP)
dataToStore = dataDir + "trainWholeErr_1004_120NP.npy"
np.save(dataToStore, trainWholeErr_1004_120NP)
dataToStore = dataDir + "trainWholeErr_1004_150NP.npy"
np.save(dataToStore, trainWholeErr_1004_150NP)
dataToStore = dataDir + "trainWholeErr_1004_180NP.npy"
np.save(dataToStore, trainWholeErr_1004_180NP)
dataToStore = dataDir + "trainWholeErr_1004_210NP.npy"
np.save(dataToStore, trainWholeErr_1004_210NP)
dataToStore = dataDir + "trainWholeErr_1004_240NP.npy"
np.save(dataToStore, trainWholeErr_1004_240NP)

In [None]:
dataToStore = dataDir + "testWholeErr_1004_30NP.npy"
np.save(dataToStore, testWholeErr_1004_30NP)
dataToStore = dataDir + "testWholeErr_1004_60NP.npy"
np.save(dataToStore, testWholeErr_1004_60NP)
dataToStore = dataDir + "testWholeErr_1004_90NP.npy"
np.save(dataToStore, testWholeErr_1004_90NP)
dataToStore = dataDir + "testWholeErr_1004_120NP.npy"
np.save(dataToStore, testWholeErr_1004_120NP)
dataToStore = dataDir + "testWholeErr_1004_150NP.npy"
np.save(dataToStore, testWholeErr_1004_150NP)
dataToStore = dataDir + "testWholeErr_1004_180NP.npy"
np.save(dataToStore, testWholeErr_1004_180NP)
dataToStore = dataDir + "testWholeErr_1004_210NP.npy"
np.save(dataToStore, testWholeErr_1004_210NP)
dataToStore = dataDir + "testWholeErr_1004_240NP.npy"
np.save(dataToStore, testWholeErr_1004_240NP)

In [None]:
np.max(trainWholeErr_1004_30NP)
# np.min(trainWholeErr_1004_30NP)

In [None]:
def get_error_for_traj_30(srcDir, num):
    """
    Compute error for the entire trajectory.
    
    Returns list of error for predictions of upto 
    4 hours i.e. 240 minutes.
    
    Parameters:
        num (int): number of vessel trajectory 
                   whose prediction errors to be computed.
    Returns:
        errorVal (list of floats): list of errors in KM for each
                                   30 min predictions.
    """    
    errorVal = []
    
    vesselTraj,lenData,yearData,monData,destLon,destLat,_,sOGVal,sOGSDVal,cOGVal = get_traj_lon_lat_data_with_len(srcDir, num)

    if(vesselTraj.shape[0] < (prevTS + 1)):
        return errorVal
        
        
    predVesselTraj = compute_n_30_min_pred(np.reshape(vesselTraj[0:prevTS,:], (prevTS,2)), lenData, yearData, monData, destLon, destLat, 0, np.reshape(sOGVal[0:prevTS,:], (prevTS,1)),np.reshape(sOGSDVal[0:prevTS,:], (prevTS,1)),np.reshape(cOGVal[0:prevTS,:], (prevTS,1)),n = 1)
    predRange = vesselTraj.shape[0] - prevTS

    #FIXME 1 can come from some variable too
    #for 1 consecutive predictions
    if(predRange > 1):
        predRange = 1

    #use trajectories which has atleast one
    #value to predict
    if(predRange >= 0):
        for pred in range(predRange):    
            trueLon, trueLat = vesselTraj[(prevTS + pred),0], vesselTraj[(prevTS + pred),1]
            predLon, predLat = predVesselTraj[pred,0], predVesselTraj[pred,1]
    #         print(trueLon, trueLat, '-', predLon, predLat)
            errorVal.append(gC.compute_distance(trueLon, trueLat, predLon, predLat))
    return errorVal

In [None]:
#store errors for all training trajectories
trainDataWholeErrors_1004 = []
for traj in range(trainTrajNum):
    trainDataWholeErrors_1004.append(get_error_for_traj_30(sourceDir,traj))

In [None]:
maxErrCount = 0;
idx = 0
err30Min = []
for err in trainDataWholeErrors_1004:
    if(len(err) > 0):
        err30Min.append(err[0])
        if(err[0] > 2.0):
            print(err,idx)
            maxErrCount = maxErrCount + 1
    idx = idx + 1
print(maxErrCount)

In [None]:
print(trainTrajNum)
print(len(trainDataWholeErrors_1004))
print(idx)

In [None]:
print(np.mean(np.array(err30Min)))
print(np.median(np.array(err30Min)))

In [None]:
vesselTraj,lenData,yearData,monData,destLon,destLat,_, sOGVal,sOGSDVal = get_traj_lon_lat_data_with_len(sourceDir, 285)

In [None]:
predVesselTraj = compute_n_30_min_pred(np.reshape(vesselTraj[0:prevTS,:], (prevTS,2)), lenData, yearData, monData, destLon, destLat, 0, np.reshape(sOGVal[0:prevTS,:], (prevTS,1)),np.reshape(sOGSDVal[0:prevTS,:], (prevTS,1)), n = 1)

In [None]:
print(predVesselTraj)

In [None]:
get_error_for_traj_30(sourceDir, 285)