## Stay point detection

In [293]:
from sklearn.cluster import DBSCAN
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from datetime import datetime
from pyproj import Transformer

In [294]:
df = pd.read_csv('taxidata/taxi_156.csv')
# covert timestamp to epoch
df['Timestamp'] = pd.to_datetime(df['Timestamp']).astype(int) / 10**9
# sort by time
df = df.sort_values(by='Timestamp')
# to array on (lon,lat)
trace = df[['Longitude','Latitude','Timestamp']].to_numpy()
# take only ... points
trace = trace[:3000]
# trace holds tuples instead of arrays
trace = [tuple(x) for x in trace]
#time float to int
trace = [(x[0],x[1],int(x[2])) for x in trace]


In [295]:
xy_clusters = []
lonlat_clusters = [[]]

# Italy in UTM 33N 
utm_to_wgs84 = Transformer.from_crs(3065, 4326, always_xy=True)
wgs84_to_utm = Transformer.from_crs(4326, 3065, always_xy=True)


for i in trace:
    x,y = wgs84_to_utm.transform(i[1],i[0]) 
    xy_point = [x,y]
    xy_clusters.append(xy_point) 

clustering = DBSCAN(eps=50, min_samples=28).fit(xy_clusters)

cluster_nr = 0
for i in range(len(clustering.labels_)):  
    new_cluster_nr = clustering.labels_[i]
    if clustering.labels_[i] != -1:
        if cluster_nr != new_cluster_nr:
            cluster_nr = new_cluster_nr
        if new_cluster_nr >= len(lonlat_clusters):
            lonlat_clusters.append([])
        lonlat_clusters[cluster_nr].append(trace[i]) 




In [296]:
fig = go.Figure()
for i in range(len(lonlat_clusters)):
    fig.add_trace(go.Scattermapbox(
        lat=[b for a,b,c in lonlat_clusters[i]],
        lon=[a for a,b,c in lonlat_clusters[i]],
        mode='lines+markers',
        marker=go.scattermapbox.Marker(
            size=9
        ),
        name = f'SP {i}',
        text=[datetime.utcfromtimestamp(c).strftime('%d/%m/%Y - %H:%M') for a,b,c in lonlat_clusters[i]]
    ))

center_of_rome = [41.9028, 12.4964]
fig.update_layout(mapbox_style="open-street-map",mapbox_center_lon=center_of_rome[1], mapbox_center_lat=center_of_rome[0],mapbox_zoom=10)
fig.show()


datetime.datetime.utcfromtimestamp() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.fromtimestamp(timestamp, datetime.UTC).

