In [None]:
'''
This notebook enables a grid search over a range of hyperparameters for maritime traffic network generation
- specify network and test data for evaluation
- specify range of hyperparameters to test
The notebook then runs a grid search over specified hyperparameters, evaluates the network and saves experiment results with neptune
'''

In [None]:
import pandas as pd
import geopandas as gpd
import movingpandas as mpd
import numpy as np
from datetime import timedelta, datetime
import time
from scipy.sparse import coo_matrix
from shapely.geometry import Point, LineString, MultiLineString
from shapely import ops
import networkx as nx
import matplotlib.pyplot as plt
import neptune
import folium
import pickle
import warnings
import sys

warnings.filterwarnings('ignore')

print("Geopandas has version {}".format(gpd.__version__))
print("Movingpandas has version {}".format(mpd.__version__))

In [None]:
# add paths for modules
sys.path.append('../src/models')
sys.path.append('../src/visualization')
sys.path.append('../src/features')
# import modules
import visualize
import geometry_utils
from maritime_traffic_network import MaritimeTrafficNetwork

In [None]:
# Specify data for network generation
datasize = 'full'
location = 'tromso'
data_date = '202204'

# Specify data for network evaluation
eval_date = '202205'

# load processed AIS data from file
orig_filename = '../data/processed/'+data_date+'_points_'+location+'_cleaned_meta_'+datasize+'_dualSplit_2.parquet'
gdf = gpd.read_parquet(orig_filename)

# Transform to desired CRS
# 32632 for UTM 32N (Stavanger, Oslo); 32634 for UTM 34N (Tromsø)
crs = 32632  # Coordinate reference system
gdf.to_crs(crs, inplace=True)  # Transformation

In [None]:
# load evaluation data
eval_file = eval_date+'_points_'+location+'_cleaned_meta_full_dualSplit_2'
filename = '../data/processed/' + eval_file + '.parquet'
test_gdf = gpd.read_parquet(filename)
test_gdf.to_crs(crs, inplace=True)  # Transformation to CRS
all_test_trajectories = mpd.TrajectoryCollection(test_gdf, traj_id_col='mmsi', obj_id_col='mmsi')

# select evaluation data
selection_start = 0
selection_end = len(all_test_trajectories)
selection_step = 3
selection = np.arange(selection_start, selection_end, selection_step)
n_trajectories = len(selection)
mmsis = test_gdf.mmsi.unique()[selection]
test_trajectories = all_test_trajectories.filter('mmsi', mmsis.tolist())

In [None]:
# Specify hyperparameters for grid search
vals_ms = [4,5,6]  # Clustering min_samples
vals_eps = []      # Clustering eps
vals_DP = []       # Douglas-Peucker tolerance
vals_v34 = []      # Mahalanobis distance sigma_cog
vals_v5 = []       # Mahalanobis distance sigma_sog
vals_max_dist = []   # egde creation max_dist
vals_max_angle = []  # edge creation max_angle
method = 'DBSCAN'      # 'DBSCAN' , 'HDBSCAN', 'OPTICS'
metric = 'euclidean'   # 'euclidean', 'mahalanobis', 'haversine'

# Loop through hyperparameters
for i in range (0, len(vals_ms)):
    # initialize neptune experiment
    run = neptune.init_run(
        project="project_name",
        api_token="token",
    )  # your credentials

    # Initialize maritime traffic network
    network = MaritimeTrafficNetwork(gdf, crs)
    network.get_trajectories_info()
    
    # parameters (either set them to fixed value or loop through parameter list specified above)
    tolerance = 10                      # Douglas-Peucker tolerance
    min_samples = vals_ms[i]            # Clustering min_samples
    min_cluster_size = vals_ms[i]       # Clustering min_cluster_size
    eps = 75                            # Clustering eps
    V = np.diag([1, 1, 0.01, 0.01, 1])  # mahalanobis distance parameter matrix V = np.diag([1, 1, 0.01, 0.01, 1])  seems to be good
    max_distance = 20                   # egde creation max_dist
    max_angle = 45                      # edge creation max_angle
    merge_stops = True                  # merging stop points that overlap, should always be True
    merge_stops_speed = 2               # speed threshold for merging of stop points
    pruning = 1                         # network pruning parameter (prunes edges with less than specified number of passages)
    
    # set model name
    model = data_date+'_waypoints_DP' + str(tolerance) + '_' + method + str(min_samples) +'_'+location+'_'+datasize+'_UTM'
    
    # save hyperparameters
    params = {
        'Data':orig_filename,
        'DP_tolerance':tolerance,
        'clustering_method':method,
        'clustering_metric':metric,
        'clustering_min_samples':min_samples,
        'clustering_min_cluster_size':min_cluster_size,
        'clustering_eps':eps,
        'clustering_metric_V':V,
        'graph_generation_max_distance':max_distance,
        'graph_generation_max_angle':max_angle
    }
    network.set_hyperparameters(params)
    
    # calculate significant turning points using Douglas Peucker algorithm
    network.calc_significant_points_DP(tolerance)
    
    # compute waypoints
    network.calc_waypoints_clustering(method=method, min_samples=min_samples, min_cluster_size=min_cluster_size,
                                      eps=eps, metric=metric, V=V)
    
    # make graph from waypoints (connect waypoints)
    network.make_graph_from_waypoints(max_distance=max_distance, max_angle=max_angle)
    
    # merge stop points
    if merge_stops:
        network.merge_stop_points(max_speed=merge_stops_speed)
    
    # prune graph
    network.prune_graph(pruning)

    # evaluate
    all_paths, all_evaluation_results, summary, fig = network.evaluate_graph(test_trajectories)

    # save experiment with neptune
    run["model"]=model
    run["algorithm"]='V7.0(SSPD,std)'
    run["n_points"]=len(network.gdf)
    run["n_nodes"]=network.G_pruned.number_of_nodes()
    run["n_edges"]=network.G_pruned.number_of_edges()
    run["n_isolated"]=nx.number_of_isolates(network.G_pruned)
    run["merge_stops"] = merge_stops
    run["merge_stops_speed"] = merge_stops_speed
    run["pruning"] = pruning
    
    params = network.hyperparameters
    params['clustering_metric_V_coord'] = params['clustering_metric_V'][0][0]
    params['clustering_metric_V_cog'] = params['clustering_metric_V'][2][2]
    params['clustering_metric_V_speed'] = params['clustering_metric_V'][4][4]
    run["parameters"] = params
    
    run["test_data"] = {'eval_file':eval_file,
                        'selection_start':selection_start,
                        'selection_end':selection_end,
                        'selection_step':selection_step,
                        'n_trajectories':n_trajectories}
    
    run["plot"].upload(fig)
    run["summary"] = summary
    
    run.stop()

    # save network as pickle object
    #fileObj = open('../data/interim/'+model+'.obj', 'wb')
    #pickle.dump(network, fileObj)
    #fileObj.close()