# Dublin Buses - Trip Graph

In [1]:
import numpy as np
import pandas as pd
import os
import math
import folium

from h3 import h3
from sklearn.cluster import DBSCAN
from shapely.geometry import Polygon
from shapely.ops import cascaded_union

import ipywidgets as widgets
from ipywidgets import interact, interact_manual
from tqdm import tqdm.notebook.tqdm as tqdm

In [2]:
def list_unique(lst):
    lst_set = set(lst)
    return list(lst_set)

In [40]:
def get_folium_polygons(shape, tooltip):
    fp = []
    if shape.geom_type == "Polygon":
        xy = shape.exterior.xy
        xy_paired = list(zip(xy[1], xy[0]))
        
        fp.append(folium.vector_layers.Polygon(locations=xy_paired, fill=True, tooltip=tooltip))
    elif shape.geom_type == "MultiPolygon":
        for g in shape.geoms:
            xy = g.exterior.coords.xy
            xy_paired = list(zip(xy[1], xy[0]))
        
            fp.append(folium.vector_layers.Polygon(locations=xy_paired, fill=True, tooltip=tooltip))
    return fp

In [37]:
def get_polygons_from_h3s(h3_hexes):
    polygons = []

    for h in h3_hexes:
        pts = h3.h3_to_geo_boundary(h)
        vertices = [[pt[1], pt[0]] for pt in pts]
        vertices.append([pts[0][1], pts[0][0]])
        polygons.append(Polygon(vertices))

    union_poly = cascaded_union(polygons)

    folium_polygons = get_folium_polygons(union_poly, "Node {0}".format(spid))
    return folium_polygons

In [38]:
def h3s_to_map(h3_hexes):
    m = folium.folium.Map()
    folium_polygons = get_polygons_from_h3s(h3_hexes)
    for fp in folium_polygons:
        fp.add_to(m)
    return m

In [4]:
df = pd.read_parquet("data/fixed.parquet")

In [5]:
stops_df = df[df.dt > 200000]

In [6]:
stops_df.shape

(1052, 16)

In [7]:
stops_df.head()

Unnamed: 0,Timestamp,LineID,Direction,PatternID,JourneyID,Congestion,Lon,Lat,Delay,BlockID,VehicleID,StopID,AtStop,dt,dx,v
111213,1357539385000000,40,0,,2282,0,-6.277933,53.418201,0,40205,33142,0,0,206672.0,230.147194,0.004009
318862,1359358561000000,51,0,,1935,0,-6.297167,53.347935,0,824006,33604,0,0,214195.0,34.51801,0.00058
373015,1357540916000000,102,0,,400,0,-6.276367,53.416466,0,102005,40030,0,0,210559.0,156.389266,0.002674
476444,1358752648000000,27,0,,3775,0,-6.191617,53.359234,0,27113,33345,0,0,219235.0,4486.110922,0.073665
483421,1359355959000000,130,0,,2033,0,-6.191433,53.358849,0,130005,33345,0,0,207297.0,266.474131,0.004628


In [8]:
# Parameters
eps_in_meters = 50.0
num_samples = 10

In [9]:
pts = np.radians(stops_df[['Lat', 'Lon']])

In [10]:
# Cluster the data
earth_perimeter = 40070000.0  # In meters
eps_in_radians = eps_in_meters / earth_perimeter * (2 * math.pi)

stop_clusters = DBSCAN(eps=eps_in_radians, min_samples=num_samples,
                       metric='haversine',
                       algorithm='ball_tree').fit_predict(pts)

In [11]:
df['spid'] = -1

In [12]:
df.loc[df.dt > 200000, 'spid'] = stop_clusters

In [13]:
np.unique(stop_clusters)

array([-1,  0,  1,  2,  3,  4,  5,  6,  7,  8])

In [14]:
m = folium.folium.Map()

In [15]:
for spid in np.unique(stop_clusters):
    if spid >= 0:
        sp_df = df[df.spid == spid]
        locs = sp_df[['Lat', 'Lon']].to_numpy()
        hexes = list_unique([h3.geo_to_h3(l[0], l[1], 11) for l in locs])
        
        folium_polygons = get_polygons_from_h3s(hexes)
        for fp in folium_polygons:
            fp.add_to(m)
        
        #map_poly = folium.vector_layers.Polygon(locations=)

In [16]:
#m

In [17]:
stops_df = df[df.dt > 200000]

In [18]:
vehicles = stops_df['VehicleID'].unique()

In [19]:
vehicles.shape

(560,)

In [20]:
vehicle_paths = []
for vehicle in vehicles:
    vehicle_df = stops_df[stops_df.VehicleID == vehicle]
    if vehicle_df.shape[0] > 1:
        spids = vehicle_df.spid.to_numpy()
        vehicle_paths.append((vehicle, np.unique(spids[spids >= 0])))

In [22]:
# vehicle_paths

In [23]:
at_stop_df = df[df.AtStop == 1]

In [24]:
at_stop_df.shape

(5425996, 17)

In [27]:
stop_ids = at_stop_df.StopID.unique()

In [28]:
stop_hexes = []
for stop_id in tqdm(stop_ids):
    stop_df = at_stop_df[at_stop_df.StopID == stop_id]
    locs = stop_df[['Lat', 'Lon']].to_numpy()
    hexes = list_unique([h3.geo_to_h3(l[0], l[1], 12) for l in locs])
    stop_hexes.append((stop_id, hexes))

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


HBox(children=(FloatProgress(value=0.0, max=4639.0), HTML(value='')))




In [36]:
len(stop_hexes[6][1])

10

In [42]:
# h3s_to_map(stop_hexes[6][1])

In [None]:
def get_hour(ts):
    return ts.hour

def get_day(ts):
    return ts.day

In [None]:
df = pd.read_parquet("data/fixed.parquet")

In [None]:
df['DateTime'] = pd.to_datetime(df['Timestamp'], unit='us')
df['Day'] = df['DateTime'].apply(get_day)
df['Hour'] = df['DateTime'].apply(get_hour)

In [None]:
u = df[['JourneyID', 'VehicleID']].values
s = set()
for i in tqdm(range(u.shape[0])):
    s.add((u[i,0], u[i,1]))

In [None]:
len(s)

In [None]:
pairs = list(s)

In [None]:
pairs[3]

In [None]:
df[df.JourneyID == 5591].shape

In [None]:
sample = df[(df.JourneyID == 5591) & (df.VehicleID == 33276)]

In [None]:
# sample.to_csv("data/sample_2931_33609.csv", index=False)

In [None]:
sample.shape

In [None]:
%matplotlib inline
bbox = sample['dt'].plot.box(figsize=(6,12))

In [None]:
u.shape

In [None]:
u[0]

In [None]:
s = set()

In [None]:
s.add((u[0,0], u[0,1]))

In [None]:
s

In [None]:
trips = {}
for j in tqdm(journeys):
    trips[j] = df[df.JourneyID == j].copy()
df = None

In [None]:
trips[3493].iloc[0:1]

In [None]:
trips[3493].iloc[-2:-1]

In [None]:
%matplotlib inline
trips[3493][['v', 'dx', 'dt']].plot.box()

In [None]:
trips[3493]['VehicleID'].unique()

In [None]:
trips[3493]['PatternID'].unique()

In [None]:
trips[3493]['LineID'].unique()

In [None]:
df = trips[3493]

In [None]:
df.dt.plot.box()

In [None]:
df[(df.dt > 200) & (df.v < 5)]

In [None]:
df.Day.unique()

In [None]:
df.PatternID.unique()

In [None]:
df[df.Day == 12]['VehicleID'].unique()

In [None]:
df[df.Day == 12]