In [3]:
import importlib
import pandas as pd
import geopandas as gpd
import numpy as np
import helper
import calendar
import os, sys
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from matplotlib import style
style.use('ggplot')
PLT_FIG_WIDTH = 4.487
PLT_FIG_HEIGHT = PLT_FIG_WIDTH / 1.618

import st_visualizer
import express as viz_express
import geom_helper as viz_helper
import bokeh.palettes as bokeh_palettes

importlib.reload(helper)
importlib.reload(viz_helper)
import psycopg2
import psycopg2.extras
from EvolvingClusters import evolving_clusters
from sklearn.cluster import DBSCAN, OPTICS
from datetime import datetime


In [10]:
%%time

df = pd.read_csv("./data/traj_final.csv", nrows = 100000)

# df.sort_values('timestamp', inplace=True)

df = helper.getGeoDataFrame_v2(df, crs='epsg:4326')

Wall time: 2.23 s


In [3]:
df.head()

Unnamed: 0,timestamp,type,mmsi,status,lon,lat,heading,turn,speed,course,timestamp_sec,velocity,bearing,acceleration,traj_id,label,trip_id,geom
0,1519855000.0,,636013190,,23.539338,37.885675,44.0,,1.4,15.5,1519855000.0,1.384017,24.892345,-0.019672,0,0,2,POINT (23.53934 37.88567)
1,1519855000.0,,237991700,,23.54615,37.94995,,,0.0,268.0,1519855000.0,0.0,0.0,0.0,0,0,2,POINT (23.54615 37.94995)
2,1519855000.0,,235100119,,23.681892,37.932647,244.0,,0.0,271.2,1519855000.0,0.054364,280.586094,0.000281,0,0,2,POINT (23.68189 37.93265)
3,1519855000.0,,256147000,,23.535167,37.861398,296.0,,0.3,214.5,1519855000.0,0.39477,228.634116,0.001103,0,0,2,POINT (23.53517 37.86140)
4,1519855000.0,,239945400,,23.6049,37.92062,,,13.2,168.0,1519855000.0,12.103464,166.692336,-0.107975,0,0,2,POINT (23.60490 37.92062)


In [4]:
df.shape

(100000, 18)

In [5]:
st_viz = st_visualizer.st_visualizer()
st_viz.set_data(df[:1000])

viz_express.plot_points_on_map(st_viz)
st_viz.show_figures(notebook=True, notebook_url='http://localhost:8888')

In [6]:
X = np.radians(df[['lat', 'lon']])

In [7]:
X[:10]

Unnamed: 0,lat,lon
0,0.66123,0.410839
1,0.662352,0.410958
2,0.66205,0.413327
3,0.660806,0.410766
4,0.66184,0.411983
5,0.661986,0.413348
6,0.662274,0.412122
7,0.66242,0.411034
8,0.661231,0.410839
9,0.662352,0.410958


In [12]:
%%time
db = DBSCAN(eps=1/6371., min_samples=len(df)//30, algorithm='ball_tree', metric='haversine').fit(X)

In [13]:
set(db.labels_)

{-1, 0, 1, 2}

In [14]:
helper.get_clusters_centers(X, db.labels_)

array([[37.95222141, 23.54558974],
       [37.93042253, 23.68222271],
       [37.93896672, 23.60345632]])

In [15]:
tmp = df.copy()
tmp.loc[:, 'cluster_labels'] = db.labels_
tmp.cluster_labels = tmp.cluster_labels.apply(str)


points = st_visualizer.st_visualizer()
points.set_data(tmp.copy())
points.create_canvas('Prototype Plot')

cmap = points.add_categorical_colormap('Category20','cluster_labels')
points.add_glyph(color=cmap, legend_group='cluster_labels')
points.add_map_tile(provider='CARTODBPOSITRON')
# points.add_hover_tooltips([('mmsi', '@mmsi'), ('traj_id', '@traj_id'), ('timestamp', '@timestamp')])

points.show_figures(notebook=True, notebook_url='http://localhost:8888')

In [18]:
%%time
optics = OPTICS(max_eps=1/6371, min_samples=len(df)//30, metric='haversine').fit(X)

In [19]:
set(optics.labels_)

{-1, 0, 1, 2, 3, 4, 5, 6}

In [20]:
helper.get_clusters_centers(X, optics.labels_)

array([[37.95222141, 23.54558974],
       [37.93263833, 23.68192731],
       [37.929536  , 23.68236893],
       [37.9289992 , 23.68306334],
       [37.94471649, 23.61500095],
       [37.93465615, 23.62277641],
       [37.92708383, 23.56494495]])

In [22]:
tmp = df.copy()
tmp.loc[:, 'cluster_labels'] = optics.labels_
tmp.cluster_labels = tmp.cluster_labels.apply(str)


points = st_visualizer.st_visualizer()
points.set_data(tmp.copy())
points.create_canvas('Prototype Plot')

cmap = points.add_categorical_colormap('Category10', 'cluster_labels')
points.add_glyph(color=cmap, legend_group='cluster_labels')
points.add_map_tile(provider='CARTODBPOSITRON')
# points.add_hover_tooltips([('mmsi', '@mmsi'), ('traj_id', '@traj_id'), ('timestamp', '@timestamp')])

points.show_figures(notebook=True, notebook_url='http://localhost:8888')

In [5]:
df = df.rename(columns={'timestamp':'ts'})

In [6]:
df.head()

Unnamed: 0,ts,type,mmsi,status,lon,lat,heading,turn,speed,course,timestamp_sec,velocity,bearing,acceleration,traj_id,label,trip_id,geom
0,1519855000.0,,636013190,,23.539338,37.885675,44.0,,1.4,15.5,1519855000.0,1.384017,24.892345,-0.019672,0,0,2,POINT (23.53934 37.88567)
1,1519855000.0,,237991700,,23.54615,37.94995,,,0.0,268.0,1519855000.0,0.0,0.0,0.0,0,0,2,POINT (23.54615 37.94995)
2,1519855000.0,,235100119,,23.681892,37.932647,244.0,,0.0,271.2,1519855000.0,0.054364,280.586094,0.000281,0,0,2,POINT (23.68189 37.93265)
3,1519855000.0,,256147000,,23.535167,37.861398,296.0,,0.3,214.5,1519855000.0,0.39477,228.634116,0.001103,0,0,2,POINT (23.53517 37.86140)
4,1519855000.0,,239945400,,23.6049,37.92062,,,13.2,168.0,1519855000.0,12.103464,166.692336,-0.107975,0,0,2,POINT (23.60490 37.92062)


In [11]:
len(df.mmsi.unique())

187

In [8]:
[res_mcs, res_mc] = evolving_clusters(df, coordinate_names=['lon', 'lat'], temporal_name='ts', temporal_unit='s', o_id_name='mmsi', distance_threshold=3704, min_cardinality=3, time_threshold=10, disable_progress_bar=False)

100%|████████████████████████████████████████████████████████████████████████| 542096/542096 [1:37:03<00:00, 93.08it/s]


In [9]:
res_mcs

Unnamed: 0,clusters,st,et


In [None]:
df.loc[:, 'datetime'] = pd.to_datetime(df.ts)

In [None]:
tmp = df.loc[(df.mmsi.isin(cluster.clusters)) & (df.datetime.between(cluster.st, cluster.et, inclusive=True))].copy()
tmp.loc[:, 'mmsi'] = tmp.mmsi.apply(str)

In [None]:
points = st_visualizer.st_visualizer()
points.set_data(tmp.copy())
points.create_canvas('Prototype Plot')

# points.add_temporal_filter(step_ms=60*10**3)

points.add_numerical_filter(filter_mode='<=', numeric_name='ts', step=60, callback_policy='value')

cmap = points.add_categorical_colormap('Category10', 'mmsi')
points.add_glyph(color=cmap, legend_group='mmsi')
points.add_map_tile(provider='CARTODBPOSITRON')
# points.add_hover_tooltips([('mmsi', '@mmsi'), ('traj_id', '@traj_id'), ('timestamp', '@timestamp')])

points.show_figures(notebook=True, notebook_url='http://localhost:8888')