In [1]:
from pygradu import shortest_path
from pygradu import gridify

import shapely.geometry
import pandas as pd
import numpy as np
import importlib

importlib.reload(gridify)

grid_2500m = gridify.area_to_grid(side_length=2500)
grid_5km = gridify.area_to_grid(side_length=5000)

MODELS_DIR = 'data/models/'
DATASET_DIR = 'data/datasets/'

graph_df_adj_2500m = pd.read_csv(MODELS_DIR + 'complete_graph_adjacent_2500m.csv')[['original', 'connected', 'cost']]
graph_df_ship_2500m = pd.read_csv(MODELS_DIR + 'complete_graph_ship_model_2500m.csv')[['original', 'connected', 'cost']]

graph_df_adj_5km = pd.read_csv(MODELS_DIR + 'complete_graph_adjacent_5km.csv')[['original', 'connected', 'cost']]
graph_df_ship_5km = pd.read_csv(MODELS_DIR + 'complete_graph_ship_model_5km.csv')[['original', 'connected', 'cost']]

graph_df_adj_5km.head()

Original NE: POINT (31.5 66.8)
Original SW: POINT (8.5 53)
Transformed NE: (697681.2863658136, 7416224.9393330095)
Transformed SW: (-735260.4063631196, 6033964.466017238)
col_count 573
row_count 552
grid_size 316296
Original NE: POINT (31.5 66.8)
Original SW: POINT (8.5 53)
Transformed NE: (697681.2863658136, 7416224.9393330095)
Transformed SW: (-735260.4063631196, 6033964.466017238)
col_count 286
row_count 276
grid_size 78936


Unnamed: 0,original,connected,cost
0,350,351,1.0
1,350,625,1.0
2,350,626,1.0
3,350,627,1.0
4,351,350,1.0


In [2]:
importlib.reload(shortest_path)

%time graph_adj_2500m = shortest_path.df_to_graph(graph_df_adj_2500m)
%time graph_ship_2500m = shortest_path.df_to_graph(graph_df_ship_2500m)

%time graph_adj_5km = shortest_path.df_to_graph(graph_df_adj_5km)
%time graph_ship_5km = shortest_path.df_to_graph(graph_df_ship_5km)

CPU times: user 1.61 s, sys: 105 ms, total: 1.72 s
Wall time: 1.42 s
CPU times: user 10.1 s, sys: 524 ms, total: 10.6 s
Wall time: 10.4 s
CPU times: user 270 ms, sys: 15.6 ms, total: 285 ms
Wall time: 290 ms
CPU times: user 1.71 s, sys: 81 ms, total: 1.79 s
Wall time: 1.45 s


In [3]:
# Convert speed model to dict

speed_model_2500m = pd.read_csv(MODELS_DIR + 'speed_model_2500m.csv', index_col=0).to_dict()
speed_model_2500m = {int(k):v for k,v in speed_model_2500m.items()}

speed_model_5km = pd.read_csv(MODELS_DIR + 'speed_model_5km.csv', index_col=0).to_dict()
speed_model_5km = {int(k):v for k,v in speed_model_5km.items()}
speed_model_5km[354]

{'CONT': nan,
 'GC': 4.114166666666667,
 'PAS': 4.644285714285715,
 'RORO': nan,
 'T': nan}

In [4]:
# Load test set

test_voyages = pd.read_csv(DATASET_DIR + 'validation_set_summer.csv', index_col=0, parse_dates = ['timestamp', 'ata', 'atd'])
test_voyages.head()

Unnamed: 0,timestamp,mmsi,lat,lon,speed,node,vessel_type,start_port,end_port,end_lat,end_lon,ata,atd,in_port,voyage,end_port_sea_area
2741,2019-06-07 11:27:49,209014000,53.974982,11.340805,4.63,849,GC,2803,19942,56.889288,12.479425,2019-06-08 10:13:53,2019-06-07 11:27:49,False,14,-1
2742,2019-06-07 11:35:43,209014000,53.992868,11.33142,4.63,849,GC,2803,19942,56.889288,12.479425,2019-06-08 10:13:53,2019-06-07 11:27:49,False,14,-1
2743,2019-06-07 11:41:43,209014000,54.00789,11.331562,4.63,849,GC,2803,19942,56.889288,12.479425,2019-06-08 10:13:53,2019-06-07 11:27:49,False,14,-1
2744,2019-06-07 11:47:53,209014000,54.018665,11.345753,4.42,849,GC,2803,19942,56.889288,12.479425,2019-06-08 10:13:53,2019-06-07 11:27:49,False,14,-1
2745,2019-06-07 11:52:59,209014000,54.020092,11.367012,4.53,849,GC,2803,19942,56.889288,12.479425,2019-06-08 10:13:53,2019-06-07 11:27:49,False,14,-1


In [7]:
# Load shallow water

shallow_graph_2500m = set(pd.read_csv(MODELS_DIR + 'shallow_water_model_2500m.csv', index_col=0).original.values)
shallow_graph_5km = set(pd.read_csv(MODELS_DIR + 'shallow_water_model_5km.csv', index_col=0).original.values)

In [6]:
# Take first observation of every voyage
test_voyages['course'] = -1
voyages = test_voyages.groupby('voyage')
first_rows = []
for voyage, observations in voyages:
    course = shortest_path.angleFromCoordinatesInDeg([observations.iloc[0].lat, observations.iloc[0].lon], [observations.iloc[1].lat, observations.iloc[1].lon])
    row = observations.iloc[1]
    row.course = course
    first_rows.append(row)

validation_set = pd.DataFrame(data=first_rows, columns=test_voyages.columns)
validation_set.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value


Unnamed: 0,timestamp,mmsi,lat,lon,speed,node,vessel_type,start_port,end_port,end_lat,end_lon,ata,atd,in_port,voyage,end_port_sea_area,course
2742,2019-06-07 11:35:43,209014000,53.992868,11.33142,4.63,849,GC,2803,19942,56.889288,12.479425,2019-06-08 10:13:53,2019-06-07 11:27:49,False,14,-1,342.857462
11820,2019-05-28 12:26:49,209185000,61.622367,17.239705,6.02,11387,GC,3012222,935,65.234398,21.628273,2019-05-29 12:23:39,2019-05-28 12:21:10,False,67,1,90.670464
15524,2019-06-04 00:07:23,209297000,54.46436,18.705115,5.97,483,GC,1242,15942,56.968345,24.077253,2019-06-05 04:44:42,2019-06-04 00:01:23,False,88,7,41.045639
16933,2019-05-16 14:07:09,209314000,63.262708,18.750272,3.34,13743,GC,1109,3347,53.90601,10.76617,2019-05-19 06:17:29,2019-05-16 14:00:49,False,93,-1,132.546724
17582,2019-05-28 20:40:46,209314000,63.28826,19.256657,5.81,13745,GC,947,24228,58.381102,24.477098,2019-05-30 11:44:19,2019-05-28 20:35:06,False,94,7,125.601933


In [8]:
# Predicting routes using the adjacent model 5km

start_time = None

importlib.reload(shortest_path)
graph_adj_5km.use_turn_penalty = False
graph_adj_5km.use_shallow_penalty = True
graph_adj_5km.use_dirways = False

%time routes_and_areas = shortest_path.predict_routes(validation_set, grid_5km, graph_adj_5km, speed_model_5km, None, shallow_graph_5km)
routes_adjacent_5km = pd.DataFrame(data=routes_and_areas[0], columns=['lat', 'lon', 'node', 'speed', 'mmsi', 'voyage', 'start_time', 'number'])
areas_adjacent_5km = pd.DataFrame(data=routes_and_areas[1], columns=['lat', 'lon', 'voyage','g', 'h', 'f'])

use_dirways= False
use_turn_penalty= False
use_shallow_penalty= True
side_length: 5000
Error count= 0
[]
CPU times: user 10min 36s, sys: 3.26 s, total: 10min 40s
Wall time: 10min 44s


In [9]:
importlib.reload(shortest_path)
%time routes_timestamp_5km_adj = shortest_path.calculate_timestamps(routes_adjacent_5km)
routes_timestamp_5km_adj.head()

CPU times: user 7.21 s, sys: 116 ms, total: 7.32 s
Wall time: 7.02 s


Unnamed: 0,lat,lon,node,speed,mmsi,voyage,start_time,number,timestamp
0,53.992868,11.33142,3354.0,4.63,209014000,14,2019-06-07 11:35:43,0,2019-06-07 11:35:43.000000
1,54.046465,11.41265,3631.0,4.649501,209014000,14,2019-06-07 11:35:43,1,2019-06-07 12:04:19.275179
2,54.0998,11.469985,3908.0,4.747096,209014000,14,2019-06-07 11:35:43,2,2019-06-07 12:28:56.346183
3,54.153118,11.527464,4185.0,4.727564,209014000,14,2019-06-07 11:35:43,3,2019-06-07 12:53:39.690829
4,54.206419,11.585086,4462.0,4.818929,209014000,14,2019-06-07 11:35:43,4,2019-06-07 13:17:55.078619


In [11]:
importlib.reload(shortest_path)
%time results_5km_adj = shortest_path.test_accuracy(grid_5km, routes_timestamp_5km_adj, test_voyages)
results_5km_adj.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  pred_route.sort_values(by=['timestamp'], inplace=True)


error count= 0
CPU times: user 6min 40s, sys: 4.31 s, total: 6min 44s
Wall time: 6min 48s


Unnamed: 0,voyage,vessel_type,end_port,end_port_sea_area,start_time,pred_time,mins_to_future,actual_lat,actual_lon,pred_lat,pred_lon,actual_speed,pred_speed,acc_distance_nm,error_rate_lat,error_rate_lon,error_rate_speed,correct_node
0,14,GC,19942,-1,2019-06-07 11:35:43,2019-06-07 11:35:43,0.0,53.992868,11.33142,53.992868,11.33142,4.63,4.639751,0.0,0.0,0.0,0.210595,1
1,14,GC,19942,-1,2019-06-07 11:35:43,2019-06-07 11:41:43,6.0,54.00789,11.331562,54.004092,11.348405,4.63,4.639751,0.636599,0.007033,0.148645,0.210595,1
2,14,GC,19942,-1,2019-06-07 11:35:43,2019-06-07 11:47:53,12.166667,54.018665,11.345753,54.015624,11.365872,4.42,4.639751,0.732831,0.005629,0.177325,4.971731,0
3,14,GC,19942,-1,2019-06-07 11:35:43,2019-06-07 11:52:59,17.266667,54.020092,11.367012,54.02516,11.380325,4.53,4.639751,0.55956,0.009382,0.117123,2.422749,1
4,14,GC,19942,-1,2019-06-07 11:35:43,2019-06-07 12:00:02,24.316667,54.02951,11.390418,54.038339,11.400315,4.53,4.639751,0.634643,0.016341,0.086885,2.422749,1


In [12]:
results_5km_adj.describe()

Unnamed: 0,voyage,end_port,end_port_sea_area,mins_to_future,actual_lat,actual_lon,pred_lat,pred_lon,actual_speed,pred_speed,acc_distance_nm,error_rate_lat,error_rate_lon,error_rate_speed,correct_node
count,74754.0,74754.0,74754.0,74754.0,74754.0,74754.0,74754.0,74754.0,74752.0,74754.0,74754.0,74754.0,74754.0,74661.0,74754.0
mean,3106.411483,578517.1,4.838698,1010.824679,58.608989,19.258922,58.623045,19.271726,6.14472,6.405815,24.332909,0.4918,2.499131,33.621809,0.058432
std,1763.526938,1852573.0,3.745004,896.329937,2.662714,3.840448,2.619508,3.785522,1.703659,1.437901,24.356328,0.557729,3.387673,382.360018,0.234559
min,14.0,923.0,-1.0,0.0,53.89457,9.954022,53.885251,9.871372,0.0,0.736985,0.0,0.0,0.0,0.000108,0.0
25%,1973.0,999.0,1.0,340.533333,56.257503,17.492565,56.272092,17.419616,5.09,5.571333,6.339408,0.089991,0.483069,4.637311,0.0
50%,2807.0,1073.0,5.0,754.566667,58.690872,19.529342,58.6907,19.484924,5.81,5.891748,15.901843,0.271696,1.381082,9.97638,0.0
75%,3847.0,15942.0,7.0,1420.329167,60.027463,21.488541,60.02947,21.425874,6.89,6.874701,34.772804,0.697539,3.233522,18.875453,0.0
max,6576.0,8616642.0,13.0,5348.783333,65.79093,29.776128,65.753231,29.801895,15.33,11.568559,165.815944,2.956229,35.140593,19091.990535,1.0


In [13]:
# Load dirways

dirways = pd.read_csv(DATASET_DIR + 'dirways_all_2018_2019.csv', parse_dates = ['publishtime', 'deletetime', 'createtime'])
# Load test set

test_voyages_winter = pd.read_csv(DATASET_DIR + 'validation_set_winter.csv', index_col=0, parse_dates = ['timestamp', 'ata', 'atd'])

# Take first observation of every voyage
test_voyages_winter['course'] = -1
voyages = test_voyages_winter.groupby('voyage')
first_rows = []
for voyage, observations in voyages:
    course = shortest_path.angleFromCoordinatesInDeg([observations.iloc[0].lat, observations.iloc[0].lon], [observations.iloc[1].lat, observations.iloc[1].lon])
    row = observations.iloc[1]
    row.course = course
    first_rows.append(row)

validation_set_winter = pd.DataFrame(data=first_rows, columns=test_voyages_winter.columns)
validation_set_winter.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value


Unnamed: 0,timestamp,mmsi,lat,lon,speed,node,vessel_type,iceclass,start_port,end_port,end_lat,end_lon,ata,atd,in_port,voyage,course,end_port_sea_area
1230,2019-03-20 14:44:01,209185000,61.622263,17.241178,5.61,11387,GC,IA,3012222,935,65.234498,21.628285,2019-03-22 00:08:31,2019-03-20 14:38:32,False,5,90.725478,1
3041,2019-02-09 01:52:51,209467000,65.222573,21.647583,2.11,16519,CONT,IA,935,6652,54.365267,10.144437,2019-02-11 15:49:55,2019-02-09 01:47:32,False,15,143.856736,-1
3647,2019-02-23 00:34:33,209467000,65.210282,21.631402,5.86,16519,CONT,IA,935,6652,54.374755,10.17713,2019-02-25 15:30:35,2019-02-23 00:28:32,False,16,141.574254,-1
5762,2019-03-22 21:31:43,209467000,65.209857,21.651402,4.22,16519,CONT,IA,935,30775,59.162025,17.663345,2019-03-24 08:55:07,2019-03-22 21:26:10,False,21,176.249323,6
13242,2019-02-01 21:59:31,212018000,65.219908,21.651478,4.06,16519,CONT,IA,935,6652,54.36545,10.143257,2019-02-04 18:12:41,2019-02-01 21:54:20,False,43,145.443601,-1


In [14]:
# Predicting winter routes using the adjacent model 5km

start_time = None

importlib.reload(shortest_path)
graph_adj_5km.use_turn_penalty = False
graph_adj_5km.use_shallow_penalty = True
graph_adj_5km.use_dirways = True

%time routes_and_areas_winter = shortest_path.predict_routes(validation_set_winter, grid_5km, graph_adj_5km, speed_model_5km, dirways, shallow_graph_5km)
routes_adjacent_5km_winter = pd.DataFrame(data=routes_and_areas_winter[0], columns=['lat', 'lon', 'node', 'speed', 'mmsi', 'voyage', 'start_time', 'number'])
areas_adjacent_5km_winter = pd.DataFrame(data=routes_and_areas_winter[1], columns=['lat', 'lon', 'voyage','g', 'h', 'f'])

use_dirways= True
use_turn_penalty= False
use_shallow_penalty= True
side_length: 5000


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  dirways.sort_values(by=['id', 'number'], inplace=True)


Error count= 0
[]
CPU times: user 5min 4s, sys: 2.05 s, total: 5min 6s
Wall time: 5min 11s


In [17]:
importlib.reload(shortest_path)
%time routes_timestamp_5km_adj_winter = shortest_path.calculate_timestamps(routes_adjacent_5km_winter)
routes_timestamp_5km_adj_winter.head()

CPU times: user 5.87 s, sys: 346 ms, total: 6.21 s
Wall time: 7.23 s


Unnamed: 0,lat,lon,node,speed,mmsi,voyage,start_time,number,timestamp
0,61.622263,17.241178,45959.0,5.61,209185000,5,2019-03-20 14:44:01,0,2019-03-20 14:44:01.000000
1,61.586999,17.391566,45684.0,5.708226,209185000,5,2019-03-20 14:44:01,1,2019-03-20 15:09:54.294205
2,61.593594,17.484973,45685.0,5.262779,209185000,5,2019-03-20 14:44:01,2,2019-03-20 15:25:43.543365
3,61.644637,17.564763,45962.0,5.427669,209185000,5,2019-03-20 14:44:01,3,2019-03-20 15:47:26.324265
4,61.69564,17.644814,46239.0,5.487834,209185000,5,2019-03-20 14:44:01,4,2019-03-20 16:08:54.891652


In [18]:
importlib.reload(shortest_path)
%time results_5km_adj_winter = shortest_path.test_accuracy(grid_5km, routes_timestamp_5km_adj_winter, test_voyages_winter)
results_5km_adj_winter.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  pred_route.sort_values(by=['timestamp'], inplace=True)


error count= 0
CPU times: user 2min 37s, sys: 41.2 s, total: 3min 18s
Wall time: 4min 4s


Unnamed: 0,voyage,vessel_type,end_port,end_port_sea_area,start_time,pred_time,mins_to_future,actual_lat,actual_lon,pred_lat,pred_lon,actual_speed,pred_speed,acc_distance_nm,error_rate_lat,error_rate_lon,error_rate_speed,correct_node
0,5,GC,935,1,2019-03-20 14:44:01,2019-03-20 14:44:01,0.0,61.622263,17.241178,61.622263,17.241178,5.61,5.659113,0.0,0.0,0.0,0.875451,1
1,5,GC,935,1,2019-03-20 14:44:01,2019-03-20 14:49:25,5.4,61.622037,17.275425,61.614984,17.272306,5.45,5.659113,0.432679,0.011444,0.018057,3.836932,1
2,5,GC,935,1,2019-03-20 14:44:01,2019-03-20 14:54:42,10.683333,61.615038,17.299898,61.607856,17.302746,3.96,5.659113,0.438834,0.011657,0.016462,42.906889,1
3,5,GC,935,1,2019-03-20 14:44:01,2019-03-20 14:59:52,15.85,61.60909,17.316947,61.600878,17.332501,3.96,5.659113,0.663569,0.013329,0.089823,42.906889,0
4,5,GC,935,1,2019-03-20 14:44:01,2019-03-20 15:05:02,21.016667,61.602707,17.337982,61.593894,17.362243,4.58,5.659113,0.87178,0.014305,0.13993,23.561415,0


In [19]:
results_5km_adj_winter.describe()

Unnamed: 0,voyage,end_port,end_port_sea_area,mins_to_future,actual_lat,actual_lon,pred_lat,pred_lon,actual_speed,pred_speed,acc_distance_nm,error_rate_lat,error_rate_lon,error_rate_speed,correct_node
count,39287.0,39287.0,39287.0,39287.0,39287.0,39287.0,39287.0,39287.0,39282.0,39287.0,39287.0,39287.0,39287.0,39245.0,39287.0
mean,314.698857,31121.86,1.334513,1738.344958,60.73306,19.426252,60.666018,19.393196,6.359651,6.23435,45.948526,1.037635,4.026942,42.785651,0.033624
std,180.640385,276553.5,2.801052,1206.548257,3.412544,3.216125,3.28542,2.950573,1.479089,1.001174,48.777317,1.176384,5.848638,380.449862,0.180263
min,5.0,931.0,-1.0,0.0,54.04495,10.1402,54.04495,10.144687,0.0,0.877776,0.0,0.0,0.0,0.000443,0.0
25%,143.0,1027.0,-1.0,705.7,58.145701,18.489867,58.171166,18.513612,5.61,5.73267,11.611163,0.211195,0.767281,5.039359,0.0
50%,341.0,1073.0,1.0,1563.683333,61.022207,19.78089,60.909807,19.670226,6.33,5.900363,29.888738,0.624719,2.074318,11.04617,0.0
75%,461.0,6652.0,1.0,2622.533333,63.752575,21.6195,63.603836,21.35473,7.41,6.503941,57.421364,1.349328,5.064831,20.695534,0.0
max,627.0,3012222.0,9.0,5509.3,65.755,27.169408,65.786941,26.957523,10.65,10.123011,262.712483,5.798519,66.987788,12814.655684,1.0
