In [7]:
import pandas as pd
import numpy as np
import folium

from gnact import utils, clust, network
import db_config

import warnings
warnings.filterwarnings('ignore')

from sklearn.cluster import DBSCAN
from sklearn.cluster import OPTICS

# create the engine to the database
engine = utils.connect_engine(db_config.colone_cargo_params, print_verbose=True)

Creating Engine...
Engine created for ais_cargo
PostgreSQL 12.3 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-39), 64-bit


In [4]:
# make the df from the data in the database for MSC Ashrui
df_posits = clust.get_uid_posits(('636016432',), engine, end_time='2018-01-01')
df_posits.head()

Unnamed: 0,id,lat,lon,time
0,15867231,42.28535,-69.12919,2017-01-06 07:04:31
1,15867232,42.28576,-69.14447,2017-01-06 07:06:31
2,15867550,42.28601,-69.15519,2017-01-06 07:07:55
3,15867549,42.28627,-69.16453,2017-01-06 07:09:08
4,15867237,42.28669,-69.17284,2017-01-06 07:10:13


In [33]:
df_traj_enhanced = clust.traj_enhance_df(df_posits)
df_slow_posits = df_traj_enhanced[df_traj_enhanced['speed_kts'] < 1]

df_clusts = clust.calc_clusts(df_slow_posits, eps_km=2, min_samp=50, method='dbscan')
df_centers = clust.calc_centers(df_clusts)
print(len(df_clusts))
clust.plot_clusters(df_posits, df_centers)

11097
Plotted 8 total clusters.


In [23]:
df_centers

Unnamed: 0,clust_id,time_min,time_max,total_clust_count,average_lat,average_lon,average_dist_from_center,time_diff
0,0,2017-01-06 12:23:00,2017-12-27 23:04:00,2082,42.342173,-71.019025,0.018261,355 days 10:41:00
1,1,2017-01-08 11:32:00,2017-12-31 17:41:00,3298,40.682997,-74.149364,0.235371,357 days 06:09:00
2,2,2017-01-10 01:58:00,2017-12-30 04:56:00,3386,39.901104,-75.134575,0.140546,354 days 02:58:00
3,3,2017-01-14 21:35:00,2017-07-06 06:30:00,88,26.527108,-78.766465,0.050596,172 days 08:55:00
4,4,2017-02-27 22:28:00,2017-04-18 15:31:00,287,32.130286,-81.13981,0.262557,49 days 17:03:00
5,5,2017-04-13 21:48:00,2017-04-14 05:39:00,170,40.487357,-73.618305,0.194834,0 days 07:51:00
6,6,2017-06-04 23:56:00,2017-06-06 17:42:00,812,38.508145,-74.642331,0.272978,1 days 17:46:00
7,7,2017-08-24 00:56:00,2017-08-25 04:58:00,974,38.249177,-73.971363,1.87543,1 days 04:02:00


In [24]:

# need new unique clusters across each uid.
clust_count = 0
# will hold results of second round temporal clustering
df_second_round = pd.DataFrame()

# begin iteration.  Look at each cluster in turn from first round results
clusters = df_clusts['clust_id'].unique()
for c in clusters:
    df_c = df_clusts[df_clusts['clust_id'] == c]
    X = ((df_c['time'].astype('int').values) / ((10**9)*60)).reshape(-1,1)
    x_id = df_c.loc[:, 'id'].astype('int').values
    # cluster again using DBSCAN with a temportal epsilon (minutes) in one dimension
    dbscan = DBSCAN(eps=360, min_samples=50, algorithm='kd_tree',
                    metric='euclidean', n_jobs=1)
    dbscan.fit(X)
    results2_dict = {'id': x_id, 'clust_id': dbscan.labels_}
    # gather the output as a dataframe
    df_clusts2 = pd.DataFrame(results2_dict)
    df_clusts2 = df_clusts2[df_clusts2['clust_id'] != -1]
    clusters2 = df_clusts2['clust_id'].unique()
    for c2 in clusters2:
        df_c2 = df_clusts2[df_clusts2['clust_id'] == int(c2)] # need int rather than numpy.int64
        # need to assign a new cluster id
        df_c2['clust_id'] = clust_count
        df_second_round = df_second_round.append(df_c2)
        clust_count +=1
#%%

df_second_results = pd.merge(df_second_round, df_clusts.drop('clust_id', axis=1), how='left', left_on='id', right_on='id')



In [25]:
df_centers = clust.calc_centers(df_second_results)
clust.plot_clusters(df_posits, df_centers)

Plotted 33 total clusters.


In [28]:
df_centers.sort_values(['average_lat', 'time_min'])

Unnamed: 0,clust_id,time_min,time_max,total_clust_count,average_lat,average_lon,average_dist_from_center,time_diff
27,27,2017-07-05 15:07:00,2017-07-06 06:30:00,87,26.527215,-78.766346,0.018104,0 days 15:23:00
29,29,2017-04-18 08:57:00,2017-04-18 15:31:00,138,32.128397,-81.137986,0.00932,0 days 06:34:00
28,28,2017-02-27 22:28:00,2017-02-28 05:21:00,149,32.132036,-81.141499,0.022013,0 days 06:53:00
32,32,2017-08-24 00:56:00,2017-08-25 04:58:00,974,38.249177,-73.971363,1.87543,1 days 04:02:00
31,31,2017-06-04 23:56:00,2017-06-06 17:42:00,812,38.508145,-74.642331,0.272978,1 days 17:46:00
16,16,2017-01-10 01:58:00,2017-01-10 14:58:00,315,39.898665,-75.135848,0.006312,0 days 13:00:00
18,18,2017-04-15 19:27:00,2017-04-16 07:27:00,236,39.898712,-75.135822,0.007748,0 days 12:00:00
17,17,2017-02-24 20:11:00,2017-02-25 09:36:00,267,39.89881,-75.135749,0.011491,0 days 13:25:00
19,19,2017-06-07 03:24:00,2017-06-08 01:58:00,461,39.901246,-75.134583,0.006085,0 days 22:34:00
20,20,2017-07-02 00:53:00,2017-07-02 13:45:00,263,39.901487,-75.13434,0.004972,0 days 12:52:00
