In [1]:
%matplotlib inline
import os
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML
from mpl_toolkits.basemap import Basemap

import pandas as pd
import numpy as np

from joblib import Parallel, delayed
import multiprocessing

from AISDataManager import AISDataManager
import Constants as c

#config parser
import configparser

#MyConfig.INI stores all the run time constants
config = configparser.ConfigParser()
config.read('MyConfig.INI')

#make object of AIS data manager
aISDM = AISDataManager()

In [None]:
#load the sorted csv for one particular vessel
oneVesselFile = "./Data/AIS_2017_LA/MMSI/366898250_Sorted.csv"
# oneVesselFile = "./Dummy.csv"
oneVesselData, retVal = aISDM.load_data_from_csv(oneVesselFile)

if(retVal == c.errNO['SUCCESS']):
    print("Loaded Successfully")
    #comment if dont want to drop static features
    dropList = ['BaseDateTime', 'VesselName', 'IMO', 'CallSign']
    oneVesselData = aISDM.drop_columns(oneVesselData, dropList)
    
else:
    print("Error Loading CSV file")

In [None]:
lonMin = (float)(config['REGEION']['LON_MIN'])
lonMax = (float)(config['REGEION']['LON_MAX'])

latMin = (float)(config['REGEION']['LAT_MIN'])
latMax = (float)(config['REGEION']['LAT_MAX'])

print(lonMin,latMin)
print(lonMax,latMax)

increStep = 0.01
incrRes = 2

xGrid = np.arange(lonMin,lonMax,increStep)
xGrid = np.around(xGrid,incrRes)
yGrid = np.arange(latMin,latMax,increStep)
yGrid = np.around(yGrid,incrRes)

In [None]:
retDF = aISDM.get_time_stamp_data(oneVesselData, 'DateTime', 'HourlyInterval17To18.txt')

In [None]:
print(retDF.head())

In [None]:
print(retDF.shape)

In [None]:
secDF = aISDM.append_seconds_column(retDF, 'DateTime')

In [None]:
print(secDF.head())

In [28]:
testDF = pd.DataFrame({"MMSI":[1,2,3,4,5,6,7,8,9,10,11,12,13]\
                        ,"LON":[10,9,8,7,6,5,4,3,2,1,0,-1,-2]\
                        ,"LAT":[10,9,8,7,6,5,4,3,2,1,0,-1,-2]\
                        ,"SOG":[10,9,8,7,6,5,4,3,2,1,0,-1,-2]\
                        ,"COG":[10,9,8,7,6,5,4,3,2,1,0,-1,-2]\
                        ,"Heading":[10,9,8,7,6,5,4,3,2,1,0,-1,-2]\
                        ,"Seconds":[100,200,300,400,700,800,900,1000,1100,1200,1600,1700,1800]\
                        })

In [29]:
print(testDF)

    MMSI  LON  LAT  SOG  COG  Heading  Seconds
0      1   10   10   10   10       10      100
1      2    9    9    9    9        9      200
2      3    8    8    8    8        8      300
3      4    7    7    7    7        7      400
4      5    6    6    6    6        6      700
5      6    5    5    5    5        5      800
6      7    4    4    4    4        4      900
7      8    3    3    3    3        3     1000
8      9    2    2    2    2        2     1100
9     10    1    1    1    1        1     1200
10    11    0    0    0    0        0     1600
11    12   -1   -1   -1   -1       -1     1700
12    13   -2   -2   -2   -2       -2     1800


In [40]:
def slice_df_for_time_series(dFObj,prevFeatureToConsider,timeStep):
    #plus one for the prediction purpose
    minimumRows = prevFeatureToConsider + 1
    dFList = []

    #get index of second column
    colList = dFObj.columns.tolist() 
    colNum = colList.index('Seconds')
    
    prevVal = dFObj.iloc[0,colNum]
    prevIndex = 0

    for i in range(1,dFObj.shape[0]):
        if(dFObj.iloc[i,colNum] - prevVal) > timeStep:
            tempDF = dFObj.iloc[prevIndex:i,:].copy()

            #check for minimum number of rows
            if(tempDF.shape[0] >= minimumRows):
                dFList.append(tempDF)

            prevIndex = i
        prevVal = dFObj.iloc[i,colNum]

    tempDF = dFObj.iloc[prevIndex:dFObj.shape[0],:].copy()

    if(tempDF.shape[0] >= minimumRows):
        dFList.append(tempDF)
    return dFList

In [53]:
#make time series data based on previous data
def make_time_series_data(dFObj, prevLonFeature, prevLatFeature, prevSOGFeature, prevCOGFeature\
                              , prevHeadingFeature, timeStep, labelColName):

    #compute the maximum value
    previousFeatures = max(prevLonFeature\
                            ,prevLatFeature\
                            ,prevSOGFeature\
                            ,prevCOGFeature\
                            ,prevHeadingFeature\
                          )
    
    print(previousFeatures)
    
    #offset computation
    #useful for indexing while preparing the data
    lonOffset = previousFeatures - prevLonFeature
    latOffset = previousFeatures - prevLatFeature
    sOGOffset = previousFeatures - prevSOGFeature
    cOGOffset = previousFeatures - prevCOGFeature
    headingOffset = previousFeatures - prevHeadingFeature
    
    dFSlices = slice_df_for_time_series(dFObj,previousFeatures, timeStep)
        
    featureMatrix = np.zeros((0,\
                         prevLonFeature\
                         +prevLatFeature\
                         +prevSOGFeature\
                         +prevCOGFeature\
                         +prevHeadingFeature\
                         ))
    
    print(featureMatrix.shape)

    labelMatrix = np.zeros((0,1))
    
    #iterate through sub data frames
    for dF in dFSlices:
        #keep on scanning their rows
        #its more like a sliding window
        for rows in range(dF.shape[0]-previousFeatures):
            sampleData = np.zeros((1,prevLonFeature\
                                   +prevLatFeature\
                                   +prevSOGFeature\
                                   +prevCOGFeature\
                                   +prevHeadingFeature\
                                  ))
            sampleDataIndex = 0
            
            colList = dF.columns.tolist() 
            
            colNum = colList.index('LON')
            for ii in range(prevLonFeature):
                sampleData[0,sampleDataIndex] = dF.iloc[rows+lonOffset+ii,colNum]
                sampleDataIndex = sampleDataIndex + 1
                
            colNum = colList.index('LAT')
            for ii in range(prevLatFeature):
                sampleData[0,sampleDataIndex] = dF.iloc[rows+latOffset+ii,colNum]
                sampleDataIndex = sampleDataIndex + 1
                
            colNum = colList.index('SOG')
            for ii in range(prevSOGFeature):
                sampleData[0,sampleDataIndex] = dF.iloc[rows+sOGOffset+ii,colNum]
                sampleDataIndex = sampleDataIndex + 1
                
            colNum = colList.index('COG')
            for ii in range(prevCOGFeature):
                sampleData[0,sampleDataIndex] = dF.iloc[rows+cOGOffset+ii,colNum]
                sampleDataIndex = sampleDataIndex + 1
                
            colNum = colList.index('Heading')
            for ii in range(prevHeadingFeature):
                sampleData[0,sampleDataIndex] = dF.iloc[rows+headingOffset+ii,colNum]
                sampleDataIndex = sampleDataIndex + 1
                
            ################################
#             print(sampleData)
            featureMatrix = np.vstack((featureMatrix,sampleData))
    
            colNum = colList.index(labelColName)
        
            tempLabel = np.array([[dF.iloc[rows+previousFeatures,colNum]]])
            labelMatrix = np.vstack((labelMatrix,tempLabel))
    return featureMatrix, labelMatrix
            

In [54]:
#now once we have the data
#lets try simple prediction algorithm
#use last LON LAT SOG COG and Heading values to predict the next values
#simple model to predict LON values
featureMat, labelMat = make_time_series_data(testDF, 3, 2, 1, 1, 1, 100, 'LON')
print(featureMat)
print(labelMat)

3
(0, 8)
[[10.  9.  8.  9.  8.  8.  8.  8.]
 [ 6.  5.  4.  5.  4.  4.  4.  4.]
 [ 5.  4.  3.  4.  3.  3.  3.  3.]
 [ 4.  3.  2.  3.  2.  2.  2.  2.]]
[[7.]
 [3.]
 [2.]
 [1.]]
