# Import packages

In [1]:
from ipywidgets import HTML
import ipywidgets as widgets
from ipyleaflet import Map, Polyline, Rectangle, basemaps, basemap_to_tiles, Polygon, FullScreenControl, Popup, WidgetControl
import pandas as pd
import numpy as np
from matplotlib import cm
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from ipyleaflet import Map, basemaps, basemap_to_tiles, Circle, FullScreenControl, LayerGroup
from ipywidgets.embed import embed_minimal_html
import sys
import scipy.stats
import matplotlib.pyplot as plt
from sklearn.neighbors import KernelDensity
from IPython.display import clear_output, display
sys.path.append('/datc/saab/notebooks')

# Define functions

In [2]:
def normalize(v):
    """
    Normalize a vector. Turn into a 1-module vector
    """
    norm = np.linalg.norm(v)
    if norm == 0: 
        return v
    return v / norm
def plot_directions_map(avg_directions, save=False, filename='default_filename'):
    """plot the directional map:
        - For every node, plot its main direction.
    """
    m = Map(center = (-25.353548853000003, -43.935133436), zoom =4)
    i = 0
    for coords, direction in avg_directions.items():
        if direction[0] == 0 or direction[1] == 0:
            color_value = 'black'
        elif direction[0] > 0 and direction[1] > 0:
            color_value = 'darkblue'
        elif direction[0] < 0 and direction[1] < 0:
            color_value = 'darkred'
        elif direction[0] < 0 and direction[1] > 0:
            color_value = 'red'
        elif direction[0] > 0 and direction[1] < 0:
            color_value = 'blue'
        line = Polyline(
            locations = [list(coords), list(coords+direction/10000)],
            color = color_value,
            fill_color= "transparent",
            weight = 3,
            opacity = 1)
        m.add_layer(line)
        i+=1
        if i==1000:
            break
    m.add_control(FullScreenControl())
    if save:
        embed_minimal_html(filename, views=[m])
    return m

def Sort(sub_li, fieldnum): 
    """
    Sort a dataframe by a field(fieldnum)
    """
    return(sorted(sub_li, key = lambda x: x[fieldnum]))

# Import the data

In [3]:
# Code to import the data and remove NaN values from it
filename = '/datc/saab/reduced_area_clean.h5'
data = pd.read_hdf(filename, 'df')
#data = data[(50 < data.length )]
data = data.dropna()
data.head()


Unnamed: 0,mmsi,datetime,latitude,longitude,orientation,rateofturn,course,length,breadth,speed,vesseltype
171,56295,2018-11-30 16:00:00.707,-25.520146,-43.769492,98.0,0.0,89.0,24.5,3.099609,0.620117,0
189,0,2018-11-30 16:00:00.707,-25.503689,-43.758524,166.25,0.0,165.0,54.8125,13.703125,3.759766,0
190,0,2018-11-30 16:00:00.707,-25.528367,-43.759105,275.25,0.0,136.375,148.75,19.203125,0.040009,0
191,0,2018-11-30 16:00:00.707,-25.552627,-43.763861,41.8125,0.0,42.40625,57.90625,8.0,4.019531,0
205,93,2018-11-30 16:00:00.707,-25.460982,-43.744079,144.75,-3.400391,167.0,21.296875,2.699219,0.560059,0


In [4]:
data.describe()

Unnamed: 0,mmsi,latitude,longitude,orientation,rateofturn,course,length,breadth,speed,vesseltype
count,1905895.0,1905895.0,1905895.0,1905895.0,1905895.0,1905895.0,1905895.0,1905895.0,1905895.0,1905895.0
mean,14568.96,-25.50158,-43.7063,,,,,,,0.05116756
std,22057.27,0.03140755,0.0431483,,,,,0.0,0.0,0.6377481
min,0.0,-25.56418,-43.77427,0.0,-337.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,-25.52672,-43.74306,108.5,0.0,92.875,17.5,2.5,0.6098633,0.0
50%,0.0,-25.49809,-43.71453,162.5,0.0,170.375,26.90625,4.898438,3.140625,0.0
75%,31191.0,-25.4754,-43.67246,298.25,0.0,281.75,46.1875,10.70312,4.371094,0.0
max,65067.0,-25.44535,-43.60242,360.0,405.75,360.0,500.0,191.75,128.875,8.0


# Get a array containing all ship's sequences --> ships_info

In [5]:
CRAFT_ID_list = data.mmsi.unique()#Get the mmsi unique values into a list:
CRAFT_ID_list = CRAFT_ID_list[CRAFT_ID_list!=0]
ships_info = []
ship_number = 0
for rowid in CRAFT_ID_list:
    #Start with empty lists
    npinfo, infolist = [], []
    #Get a numpy array composed by 'latitude', 'longitude', 'orientation', 'length', 'breadth'
    npinfo = data[data.mmsi == rowid][['latitude', 'longitude', 'length', 'datetime']].values
    
    ships_info.append(npinfo)
    
    ship_number+=1
    if ship_number%100 == 0:
        print(ship_number, '/', len(CRAFT_ID_list))
        clear_output(wait=True)
print('finished')
ships_info = [Sort(row, -1) for row in ships_info]

finished


# Split data into train/tests sets

In [6]:
border = int(0.9*len(ships_info))
train_ships = ships_info[:border]
test_ships = ships_info[border:]

# Get every ship direction

In [7]:
values = []
for ship in train_ships:
    values.append([])
    for i in range(len(ship)-1):
        try:
            #val = normalize(ship[i+1][:2]-ship[i][:2])
            val = ship[i][2]
            #print(val)
            values[-1].append(val)
        except Exception as e:
            print('problem!!!!!!!!!!!!!!', e)
            pass

In [8]:
print(train_ships[0])

[array([-25.520146135999997, -43.769491607000006, 24.5,
       Timestamp('2018-11-30 16:00:00.707000')], dtype=object), array([-25.520143705, -43.769494876, 24.5,
       Timestamp('2018-11-30 16:00:03.707000')], dtype=object), array([-25.520142321999998, -43.769489426999996, 24.5,
       Timestamp('2018-11-30 16:00:06.707000')], dtype=object), array([-25.520136329, -43.769484566, 24.5,
       Timestamp('2018-11-30 16:00:09.707000')], dtype=object), array([-25.520136119, -43.769486494, 24.59375,
       Timestamp('2018-11-30 16:00:12.708000')], dtype=object), array([-25.520136748000002, -43.769489511, 24.59375,
       Timestamp('2018-11-30 16:00:15.707000')], dtype=object), array([-25.520138299, -43.769497977, 24.59375,
       Timestamp('2018-11-30 16:00:18.707000')], dtype=object), array([-25.520139975, -43.769509879, 24.59375,
       Timestamp('2018-11-30 16:00:21.707000')], dtype=object), array([-25.520141861, -43.76951692, 24.59375,
       Timestamp('2018-11-30 16:00:24.707000')], dt

# Get the bounds of the grid according to a number of decimals defined

In [9]:
decimals = 3
radius = 10**(-decimals)

minimum_lat = min(np.round(data[['latitude']].values, decimals))[0]
maximum_lat = max(np.round(data[['latitude']].values, decimals))[0]
minimum_lon = min(np.round(data[['longitude']].values, decimals))[0]
maximum_lon = max(np.round(data[['longitude']].values, decimals))[0]
print(minimum_lat, maximum_lat, minimum_lon, maximum_lon)

-25.564 -25.445 -43.774 -43.602


# Define the possible values of lat-long along the grid

In [10]:
lon_coordinates = np.arange(start=minimum_lon, stop=maximum_lon, step=np.round(10**(-decimals), decimals))
lat_coordinates = np.arange(start=minimum_lat, stop=maximum_lat, step=np.round(10**(-decimals), decimals))

In [11]:
class nan_obj:
    def score_samples(x,y):
         return [np.nan]
def generic_KNN_rounding(lon_coordinates, lat_coordinates, values, train_ships, bw, decimals):
    values_list = {}
    i=0
    for lat in lat_coordinates:
        for lon in lon_coordinates:            
            values_list[(np.round(lat,decimals), np.round(lon,decimals))]=[]
            i+=1

    for i in range(len(train_ships)):
        print('Computing row : ', i+1)
        clear_output(wait=True)
        for j in range(len(train_ships[i])-1):
            try:
                values_list[(np.round(train_ships[i][j][0],decimals), np.round(train_ships[i][j][1],decimals))].append([values[i][j]])
            except:
                values_list[(np.round(train_ships[i][j][0],decimals), np.round(train_ships[i][j][1],decimals))] = [[values[i][j]]]
    print('Computed ', len(lon_coordinates)*len(lat_coordinates), ' nodes.')
    norm_dist_params = {}
    for coords, vals in values_list.items():
        if len(vals) != 0:
            norm_dist_params[coords] = [np.mean(np.array(vals)), np.std(np.array(vals))]
        else:
            norm_dist_params[coords] = [[np.nan, np.nan]]
    KDE_density_func = {}


    for coords, vals in values_list.items():
        if len(vals) > 20:
            #print(vals)
            KDE_density_func[coords] = KernelDensity(kernel='gaussian', bandwidth=bw).fit(vals)
        else:
            KDE_density_func[coords] = nan_obj()
    
    return values_list, KDE_density_func, norm_dist_params

In [36]:
values_list, KDE_density_func, norm_dist_params = generic_KNN_rounding(lon_coordinates, lat_coordinates, values, train_ships, 5, decimals)

Computed  20587  nodes.


In [37]:
keys = [x[0] for x in sorted(values_list.items(), key=lambda x: len(x[1]), reverse=True) if len(x[1]) > 0]
max_steps = len(keys)

# Define the slider
ships_slider = widgets.IntSlider(
    value=3500,
    min=0,
    max=max_steps,
    step=1,
    description='Ships: ',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)

# Plot it

widget_steps = WidgetControl(widget=ships_slider)
previous_value = 0
import seaborn as sns
#Define the update function for the slider
def update_plot(ships_slider):
    coords = keys[ships_slider]
    values = values_list[coords]
    mean0 = norm_dist_params[coords][0]
    std0 = norm_dist_params[coords][1]
    print('mean VALUE: ', mean0,'\nstd  VALUE: ', std0)
    #Define a bound for the plot
    x = np.linspace(mean0 - 10*abs(std0), mean0 + 10*abs(std0),100)
    y = scipy.stats.norm(mean0,std0).pdf(x)

    #Normal distribution aproximation
    plt.plot(x,y,c='blue', alpha=0.5)
    # Actual data
    plt.hist(np.array(values), bins=50, normed=True, color='darkblue', alpha=0.5)
    sns.kdeplot(np.array(values)[:,0], color='red', bw=5)
    plt.legend(['infered distribution',
                'actual distribution',
                'KDE infered distribution'])
    plt.title('Normal Distribution Aproximation')
    plt.xlabel('LAT/LON difference')
    plt.ylabel('Frequency')
    plt.show()
widgets.interactive(update_plot, ships_slider=ships_slider)

interactive(children=(IntSlider(value=3500, continuous_update=False, description='Ships: ', max=10348), Output…

In [38]:
from collections import Counter

class ship_buffer:
    def __init__(self, threshold):
        self.items = []
        self.threshold = threshold

    def isEmpty(self):
        return self.items == []

    def enqueue(self, item):
        self.items.insert(0,item)

    def dequeue(self):
        return self.items.pop()

    def size(self):
        return len(self.items)
    
    def color(self):
        #print(self.items)
        #print(sum(elem[0] < self.threshold for elem in self.items))
        if sum(elem[1] < .4  for elem in self.items) >= 0.5*len(self.items):
            return 'blue'#Anchored
        elif sum(np.isnan(elem[0]) for elem in self.items) >= 0.5*len(self.items):
            return 'orange'#Too many nan values
        elif sum(elem[0] < self.threshold for elem in self.items) >= len(self.items):
            return 'red'#Anomaly
        else:
            return 'green'#OK
    
    def print_queue(self):
        #print(self.items)
        return

In [39]:
th = 10e-20
queue_size = 5

In [40]:
print(test_ships[0])

[array([-25.564086252, -43.62904334700001, 161.875,
       Timestamp('2018-12-01 12:21:58.207000')], dtype=object), array([-25.563962535, -43.62911878499999, 161.875,
       Timestamp('2018-12-01 12:22:01.207000')], dtype=object), array([-25.563834082, -43.629192294, 161.875,
       Timestamp('2018-12-01 12:22:04.206000')], dtype=object), array([-25.563708102, -43.62926898799999, 161.875,
       Timestamp('2018-12-01 12:22:07.207000')], dtype=object), array([-25.563579314000002, -43.629337803999995, 161.875,
       Timestamp('2018-12-01 12:22:10.207000')], dtype=object), array([-25.563451740999998, -43.62941366, 161.875,
       Timestamp('2018-12-01 12:22:13.207000')], dtype=object), array([-25.563325803, -43.62948582799999, 161.875,
       Timestamp('2018-12-01 12:22:16.207000')], dtype=object), array([-25.56319974, -43.629554643999995, 161.875,
       Timestamp('2018-12-01 12:22:19.207000')], dtype=object), array([-25.563073800999998, -43.629625471000004, 161.875,
       Timestamp('2

In [41]:
from scipy import stats
import math
m = Map(center = (-25.353548853000003, -43.935133436), zoom =10)#Define the map object

#To define  the maximum number of steps we will be able to take with the slider
max_steps = len(test_ships)
ships_slider = widgets.IntSlider(
    value=0,
    min=0,
    max=max_steps,
    step=1,
    description='Ships: ',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)

prob_env = ship_buffer(th)
for i in range(queue_size):
    prob_env.enqueue([np.nan, np.nan])

widget_steps = WidgetControl(widget=ships_slider, position='topright')
m.add_control(widget_steps)
m.add_control(FullScreenControl())
dark_matter_layer = basemap_to_tiles(basemaps.CartoDB.DarkMatter)
m.add_layer(dark_matter_layer)
for ship in train_ships:
    line = Polyline(
        locations = [[list(elem[:2]) for elem in ship]],
        color = 'gray',
        fill_color= "transparent",
        weight = 1,
        opacity = 0.1)
    m.add_layer(line)
previous_value = 0


def update_map(ships_slider):
    global previous_value, m
    if previous_value > ships_slider:
        m = Map(center = (-22.884059, 133.714373), zoom =4)#Define the map object
        ini, end = 0, ships_slider
    else:
        ini, end = previous_value, ships_slider
        
    step = 1
    for i in range(ini, end, step):
        color_value = 'green'
        for j in range(0,len(test_ships[i])-step, step):
            slope = test_ships[i][j][2]
            print(slope)
            #print(slope)
            try:
                dist_value = 10**(KDE_density_func[(np.round(test_ships[i][j][0], decimals), np.round(test_ships[i][j][1], decimals))].score_samples([[slope]])[0])
                print(dist_value)
                prob_env.dequeue()
                prob_env.enqueue([dist_value, test_ships[i][j][2], len(values_list[(np.round(test_ships[i][j][0], decimals), np.round(test_ships[i][j][1], decimals))])])
                color_value = prob_env.color()
                line = Polyline(
                    locations = [list(test_ships[i][j][:2]), list(test_ships[i][j+step][:2])],
                    color = color_value,
                    fill_color= "transparent",
                    weight = 2,
                    opacity = 1)
                m.add_layer(line)
            except Exception as e:
                print(e)

    previous_value = ships_slider
display(m)
widgets.interactive(update_map, ships_slider=ships_slider)

Map(basemap={'url': 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', 'max_zoom': 19, 'attribution': 'Map …

interactive(children=(IntSlider(value=0, continuous_update=False, description='Ships: ', max=51), Output()), _…