In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%matplotlib inline
import sys
import os
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.animation as animation
from mpl_toolkits.basemap import Basemap
from matplotlib.patches import Circle
import seaborn as sns; 
from IPython.display import HTML

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn import preprocessing
from sklearn import svm
from sklearn.metrics import r2_score

In [3]:
#config parser
import configparser

sys.path.insert(0, '../Common/')
from AISDataManager import AISDataManager
import Constants as c
import HMUtils as hMUtil
import TimeUtils as timeUtils

#MyConfig.INI stores all the run time constants
config = configparser.ConfigParser()
config.read('../MyConfig.INI')

from joblib import Parallel, delayed
import multiprocessing
aISDM = AISDataManager()
numCores = multiprocessing.cpu_count()

In [4]:
lonMin = (float)(config['TP_SEC_ORDER']['LON_MIN'])
lonMax = (float)(config['TP_SEC_ORDER']['LON_MAX'])

latMin = (float)(config['TP_SEC_ORDER']['LAT_MIN'])
latMax = (float)(config['TP_SEC_ORDER']['LAT_MAX'])

print(lonMin,latMin)
print(lonMax,latMax)

increStep = (float)(config['TP_SEC_ORDER']['INCR_STEP'])
incrRes = (int)(config['TP_SEC_ORDER']['INCR_RES'])

fileDir = config['TP_SEC_ORDER']['SOURCE_DIR']
dirToStore = config['TP_SEC_ORDER']['DEST_DIR']

timeStart = (int)(config['TP_SEC_ORDER']['TIME_START'])
timeEnd = (int)(config['TP_SEC_ORDER']['TIME_END'])

print("SOURCE_DIR = %s"%fileDir)
print("DEST_DIR = %s"%dirToStore)

-120.5 33.9
-119.0 34.38
SOURCE_DIR = ../Data/M120_50_M119_00_33_90_34_38/HalfHourlyLE/
DEST_DIR = ../Data/M120_50_M119_00_33_90_34_38/SparseTP/


In [5]:
heatMapGrid = hMUtil.generate_grid(lonMin, lonMax, latMin, latMax, increStep, incrRes)

In [6]:
boundaryArray = heatMapGrid[2]
horizontalAxis = heatMapGrid[0]
verticalAxis = heatMapGrid[1]
totalStates = horizontalAxis.shape[0] * verticalAxis.shape[0]

In [7]:
#1 to load existing data
neighTPCount = []
#for total states 
for i in range(totalStates):
    neighTPCount.append(np.zeros((totalStates * totalStates)))

In [8]:
#get index from lon and lat position
#needs boundary array to get the index
def get_index_from_lon_lat(lon,lat):
    retVal = -1
    for boundary in boundaryArray: 
        if(lon >= boundary[0]) and (lon < boundary[1]) \
            and (lat >= boundary[2]) and (lat < boundary[3]):
            retVal = boundary[4]
            break 
    return retVal

def clear_tm_matrix():
    for i in range(totalStates):
        neighTPCount[i].fill(0)
        
def compute_transition_prob(firstNum):
    fileName1 = fileDir + str(firstNum) + '.csv'
    fileName2 = fileDir + str(firstNum+1) + '.csv'
    fileName3 = fileDir + str(firstNum+2) + '.csv'

    #transition probability for previous hour
    firstDF,_ = aISDM.load_data_from_csv(fileName1)
    #transition probability for current hour
    secondDF,_ = aISDM.load_data_from_csv(fileName2)
    #transition probability for next hour
    thirdDF,_ = aISDM.load_data_from_csv(fileName3)
    
    #vessels in previous hour
    prevVessels = aISDM.get_list_of_unique_mmsi(firstDF)
    #vessels in current hour
    currVessels = aISDM.get_list_of_unique_mmsi(secondDF)
    #vessels in next hour
    nextVessels = aISDM.get_list_of_unique_mmsi(thirdDF)
    
    #index based on MMSI
    #useful for indexing
    firstDF = firstDF.set_index('MMSI')
    secondDF = secondDF.set_index('MMSI')
    thirdDF = thirdDF.set_index('MMSI')
    
    #compute vessels of interest
    #this are vessels which are of interest
    vesselsOfInterest = list(set(prevVessels) & set(currVessels) & set(nextVessels))
    #iterate through every vessel
    #get the first LON and LAT coordinate for the first vessel
    for vessel in vesselsOfInterest:
        #get the LON and LAT value of the vessel
        #get the location where vessel was previously and is currently
        #now we have the location
        vesselPrevIndex = get_index_from_lon_lat(firstDF.loc[vessel,'LON'],firstDF.loc[vessel,'LAT'])
        if(vesselPrevIndex == -1):
            print("Something is wrong")
            break
        vesselCurrIndex = get_index_from_lon_lat(secondDF.loc[vessel,'LON'],secondDF.loc[vessel,'LAT'])
        #now lets findout where this vessel is
        vesselNextIndex = get_index_from_lon_lat(thirdDF.loc[vessel,'LON'],thirdDF.loc[vessel,'LAT'])
        vesselPrevCurrIndex = (vesselPrevIndex * totalStates) + vesselCurrIndex
        neighTPCount[vesselNextIndex][vesselPrevCurrIndex] = neighTPCount[vesselNextIndex][vesselPrevCurrIndex] + 1

In [None]:
clear_tm_matrix()
for fileCounter in range(timeStart,timeEnd):
    try:
        compute_transition_prob(fileCounter)
        print("Done Computing %d"%(fileCounter))
    except KeyboardInterrupt:
        break

In [12]:
#store everything in destination directory
for neighb in range(totalStates):
    opFile = dirToStore + str(neighb)+'.npy'
    np.save(opFile, neighTPCount[neighb])
    
#also compute sum of probabilities
sumCount = np.zeros(totalStates*totalStates)
for i in range(totalStates):
    sumCount = sumCount + neighTPCount[i]

opFile = dirToStore + "SumCount"+'.npy'
np.save(opFile, sumCount)