In [2]:
import warnings
import itertools
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt

from itertools import islice, takewhile, chain
from functools import reduce
from typing import Optional
import datetime as dt
from dataclasses import asdict, fields
from importlib import reload

from geopy.distance import distance
from shapely.geometry import Point, LineString
import geopandas as gpd

from typing import List
import ipyleaflet as lf

pd.set_option('display.max_rows', 1000000000)

In [46]:
import busboy.model as m
import busboy.geo as geo
import busboy.database as db
import busboy.prediction as prediction
import busboy.map.map as bmap
import busboy.apis as api
import busboy.util as util
import busboy.util.notebooks as notebook

In [47]:
reload(util)
reload(geo)
reload(m)
reload(db)
reload(prediction)
reload(bmap)
reload(api)
reload(notebook)

<module 'busboy.util.notebooks' from '/Users/Noel/Developer/Projects/Busboy/busboy/util/notebooks.py'>

In [4]:
rbn = db.routes_by_name()
route = rbn["220"].id
entries = db.entries(r=route, d=dt.date(2019, 2, 11))
stops_by_name = db.stops_by_name()
timetables = list(api.timetables("220", stops_by_name))
timetable_variants = {t for timetable in timetables for t in timetable.variants}

In [5]:
entries_by_vehicle = util.dict_collect_list(entries, lambda e: e.vehicle)
vehicles_by_entry_count = [v for (v, es) in sorted(entries_by_vehicle.items(), key = lambda t: len(t[1]))]

In [6]:
pvars = sorted(list(
    prediction.possible_variants(
        prediction.drop_duplicate_positions(entries_by_vehicle[vehicles_by_entry_count[11]]), 
        timetable_variants
    )),
    key = lambda t: t[0].poll_time)

In [7]:
order_pvars = list(prediction.check_variant_order(pvars))

In [84]:
precheck = [(len(t[1]), t[0].latitude, t[0].longitude, t[0].poll_time.time().isoformat(), {(tv.route, i, tv.stops[i].name, tv.stops[i + 1].name) for (tv, i) in t[1]}) for t in islice(pvars, 200, 300)]
precheck

[(6,
  51.87828416666667,
  -8.436893055555556,
  '20:01:27.593951',
  {('220',
    12,
    'Douglas Village East (Shopping Centre)',
    'Douglas Road (South Link Exit Slip Road)'),
   ('220',
    18,
    'Douglas Village East (Shopping Centre)',
    'Douglas Road (South Link Exit Slip Road)'),
   ('220',
    21,
    'Douglas Village East (Shopping Centre)',
    'Douglas Road (South Link Exit Slip Road)'),
   ('220',
    43,
    'Douglas Road (Clermont Ave)',
    'Douglas East Village (Opp Tramway Tce)')}),
 (6,
  51.87699805555555,
  -8.435628888888889,
  '20:01:47.597543',
  {('220',
    11,
    'Maryborough Hill (Paddocks)',
    'Douglas Village East (Shopping Centre)'),
   ('220',
    17,
    'Maryborough Hill (Paddocks)',
    'Douglas Village East (Shopping Centre)'),
   ('220',
    20,
    'Maryborough Hill (Paddocks)',
    'Douglas Village East (Shopping Centre)'),
   ('220',
    43,
    'Douglas Road (Clermont Ave)',
    'Douglas East Village (Opp Tramway Tce)')}),
 (6,
  51.8

In [85]:
postcheck = [(len(t[1]), t[0].latitude, t[0].longitude, t[0].poll_time.time().isoformat(), {(tv.route, i, tv.stops[i].name, tv.stops[i + 1].name) for (tv, i) in t[1]}) for t in islice(order_pvars, 200, 300)]
postcheck

[(3,
  51.87828416666667,
  -8.436893055555556,
  '20:01:27.593951',
  {('220',
    43,
    'Douglas Road (Clermont Ave)',
    'Douglas East Village (Opp Tramway Tce)')}),
 (3,
  51.87699805555555,
  -8.435628888888889,
  '20:01:47.597543',
  {('220',
    43,
    'Douglas Road (Clermont Ave)',
    'Douglas East Village (Opp Tramway Tce)')}),
 (3,
  51.87699805555555,
  -8.435628888888889,
  '20:02:07.608235',
  {('220',
    43,
    'Douglas Road (Clermont Ave)',
    'Douglas East Village (Opp Tramway Tce)')}),
 (3,
  51.87699805555555,
  -8.435628888888889,
  '20:02:27.624700',
  {('220',
    43,
    'Douglas Road (Clermont Ave)',
    'Douglas East Village (Opp Tramway Tce)')}),
 (3,
  51.87699805555555,
  -8.435628888888889,
  '20:02:47.641593',
  {('220',
    43,
    'Douglas Road (Clermont Ave)',
    'Douglas East Village (Opp Tramway Tce)')}),
 (3,
  51.87699805555555,
  -8.435628888888889,
  '20:03:07.658397',
  {('220',
    43,
    'Douglas Road (Clermont Ave)',
    'Douglas East

In [8]:
# show stop times
# collected = [util.dict_collect_set(vs, lambda tpl: tpl[0]) for (e, vs) in order_pvars]
stop_shaped_entries = [(e, {v: {t[1] for t in ts}}) for (e, vs) in order_pvars for (v, ts) in util.dict_collect_set(vs, lambda tpl: tpl[0]).items()]
times = prediction.stop_times(stop_shaped_entries)

In [43]:
calculated_times = set()

for v, ts in islice(times.items(), 0, 10):
    for p, stop_times in islice(ts.items(), 0, 100):
        for t in stop_times:
            calculated_times.add(tuple(chain([v.stops[p].name], (x for x in t))))
            
            
for (name, t1, t2) in sorted(calculated_times, key=lambda tpl: tpl[1]):
    print(f"{name:50} ({t1.time().isoformat()} -> {t2.time().isoformat()})")

Killumney Road (Kilumney Cross)                    (19:09:05.460486 -> 19:09:25.461053)
Ovens (EMC Terminus)                               (19:10:25.494623 -> 19:11:05.527238)
Ovens (EMC Terminus)                               (19:10:45.510832 -> 21:07:10.027195)
Ovens (EMC Terminus)                               (19:10:45.510832 -> 20:49:09.319603)
Ovens (EMC Terminus)                               (19:10:45.510832 -> 20:53:09.410369)
Killumney Road (Kilumney Cross)                    (19:11:05.527238 -> 19:11:25.543803)
Ovens (EMC Terminus)                               (19:12:05.577094 -> 19:12:25.593565)
Ballincollig West (Classes Lake)                   (19:13:45.634494 -> 19:14:25.667575)
Ballincollig West (Opp Aylsbury Estate)            (19:17:05.778574 -> 19:17:25.794942)
Ballincollig West (Opp Old Quarry)                 (19:17:45.811519 -> 19:18:05.819767)
Ballincollig West (Opp Coolroe Heights)            (19:18:05.819767 -> 19:18:25.831256)
Ballincollig West (Op White Hors

In [38]:
themap = bmap.Map()
themap.map

Map(basemap={'url': 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', 'max_zoom': 19, 'attribution': 'Map …

In [39]:
stops_by_name = {s.name: s for s in db.stops()}
timetables220 = api.timetables("220", stops_by_name)
for s in util.unique(s for t in timetables220 for v in t.variants for s in v.stops):
    themap._add_marker(x=s.latitude, y=s.longitude, tooltip=s.name)

In [42]:
notebook.plot_entries(
    themap, 
    (t[0] for t in islice(stop_shaped_entries, 100, 200)), 
    entry_to_layer = lambda e: lf.Marker(
        location=(e.latitude, e.longitude),
        draggable=False,
        title=e.poll_time.time().isoformat()
    ),
    clear=False
)

In [3]:
stops = db.stops_by_route_name(db.default_connection(), "220")

In [4]:
c = db.default_connection()
d = dt.date(2019, 1, 8)
gdf = db.data_gdf(c, r, d)
print(gdf.shape)
gdf.head()

(15980, 16)


Unnamed: 0_level_0,accuracy_level,bearing,category,congestion_level,direction,has_bike_rack,is_accessible,latitude,longitude,pattern_id,route,status,trip,vehicle_id,Entries,Coordinates
last_modified,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2019-01-08 22:36:59.506,3,89,5.0,1.0,1,False,False,51.888829,-8.525547,PatternId(raw='7338650210241173397'),RouteId(raw='7338652709907595333'),5,TripId(raw='7338656568253919758'),VehicleId(raw='7338674957838188752'),DatabaseEntry(last_modified=datetime.datetime(...,POINT (-8.525546944444445 51.88882888888889)
2019-01-08 22:36:59.506,3,89,5.0,1.0,1,False,False,51.888829,-8.525547,PatternId(raw='7338650210241173743'),RouteId(raw='7338652709907595333'),1,TripId(raw='7338656568253919757'),VehicleId(raw='7338674957838188752'),DatabaseEntry(last_modified=datetime.datetime(...,POINT (-8.525546944444445 51.88882888888889)
2019-01-08 08:41:02.976,3,89,5.0,1.0,1,False,False,51.887951,-8.595977,PatternId(raw='7338650210241173586'),RouteId(raw='7338652709907595333'),4,TripId(raw='7338656568301097475'),VehicleId(raw='7338674957838188924'),DatabaseEntry(last_modified=datetime.datetime(...,POINT (-8.595977222222222 51.88795111111111)
2019-01-08 14:28:44.681,3,221,5.0,1.0,2,False,False,51.874689,-8.43229,PatternId(raw='7338650210241173747'),RouteId(raw='7338652709907595333'),1,TripId(raw='7338656568287293959'),VehicleId(raw='7338674957838188763'),DatabaseEntry(last_modified=datetime.datetime(...,POINT (-8.432289722222222 51.87468944444444)
2019-01-08 08:40:52.930,3,20,5.0,1.0,2,False,False,51.892035,-8.506076,PatternId(raw='7338650210241173747'),RouteId(raw='7338652709907595333'),1,TripId(raw='7338656568253919748'),VehicleId(raw='7338674957838188926'),DatabaseEntry(last_modified=datetime.datetime(...,POINT (-8.506076111111112 51.892035)


In [5]:
church_cross_east = Point((51.89217, -8.55789))
trips = gdf["trip"].unique()

In [6]:
gdf = gdf[gdf["latitude"] <= 90]

In [8]:
ds = p.stop_distances(gdf[gdf["trip"] == trips[0]], (church_cross_east.x, church_cross_east.y))

In [44]:
print(ds.index.min())
print(ds.index.max())

2019-01-08 20:49:57.958000
2019-01-08 23:36:00.413000


In [83]:
ds[ds["vehicle_id"] == '7338674957838188926'].index.max()

Timestamp('2019-01-08 23:32:51.512000')

In [10]:
ds[ds["stop_distance"] < 100]

Unnamed: 0_level_0,accuracy_level,bearing,category,congestion_level,direction,has_bike_rack,is_accessible,latitude,longitude,pattern_id,route,status,trip,vehicle_id,Entries,Coordinates,stop_distance
last_modified,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2019-01-08 15:11:29.223,3,265,5.0,1.0,2,False,False,51.892103,-8.557722,PatternId(raw='7338650210241173747'),RouteId(raw='7338652709907595333'),1,TripId(raw='7338656568287293959'),VehicleId(raw='7338674957838188763'),DatabaseEntry(last_modified=datetime.datetime(...,POINT (-8.557721944444445 51.8921025),13.793412


In [9]:
themap = bmap.Map(delete=False)
themap.map

Map(basemap={'url': 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', 'max_zoom': 19, 'attribution': 'Map …

In [30]:
themap.display_df(gdf[gdf["trip"] == trips[19]])

In [21]:
stop_times = p.stop_times_220(gdf[gdf["trip"] == trips[0]])
stop_times.shape

(128, 19)

In [35]:
stop_times["stop_name"].sort_index()

last_modified
2019-01-08 12:26:11.697                        Ovens (EMC Terminus)
2019-01-08 12:58:11.651            Western Rd (Opp UCC Castlewhite)
2019-01-08 12:58:11.667            Western Rd (Opp UCC Castlewhite)
2019-01-08 13:02:04.111             Sheares Street (Mercy Hospital)
2019-01-08 13:47:34.761                        Carrigaline (Church)
2019-01-08 13:47:34.777                        Carrigaline (Church)
2019-01-08 13:48:08.037    Carrigaline (Town Ctr Bridge Southbound)
2019-01-08 13:53:25.975    Carrigaline (Town Ctr Bridge Southbound)
2019-01-08 13:54:14.243    Carrigaline (Town Ctr Bridge Southbound)
2019-01-08 13:54:31.325    Carrigaline (Town Ctr Bridge Southbound)
2019-01-08 13:54:47.362    Carrigaline (Town Ctr Bridge Southbound)
2019-01-08 13:55:03.400    Carrigaline (Town Ctr Bridge Southbound)
2019-01-08 13:55:19.454    Carrigaline (Town Ctr Bridge Southbound)
2019-01-08 13:56:28.072    Carrigaline (Lr Kilmoney Opp Serv Statio
2019-01-08 13:57:10.790          C

In [18]:
import math
def angle(pt1, pt2):
    x1, y1 = pt1
    x2, y2 = pt2
    inner_product = x1 * x2 + y1 * y2
    len1 = math.hypot(x1, y1)
    len2 = math.hypot(x2, y2)
    return math.acos(inner_product / (len1 * len2))

In [20]:
c1 = (51.887951, -8.595977)
c2 = (51.888829, -8.525547)
c3 = (51.874689, -8.432290)
x = LineString([c1, c2])
y = LineString([c1, c3])

In [22]:
v1 = (c2[0] - c1[0], c2[1] - c1[1])
v2 = (c3[0] - c1[0], c3[1] - c1[1])
angle(v1, v2)

0.09330953061553678

In [None]:
gdf.describe(include="all")

In [2]:
df = pd.read_csv('data/220-buses.csv', 
        parse_dates=['last_modified'],
        true_values=['t'],
        false_values=['f'])

df['longitude'] /= 3600000
df['latitude'] /= 3600000
df.set_index('last_modified', drop=False, inplace=True)

In [3]:
def trips(df):
    return [df[df['trip_id'] == t] for t in df['trip_id'].unique()]

In [8]:
church_east = (51.89217, -8.55789) # lat-lon
church_west = (51.89221, -8.55923)
shopping_centre = (51.88801, -8.59634)

In [5]:
def stop_rows(stop, trip, threshold=100):
    def f(acc, r):
        rs = acc[0]
        stops = acc[1]
        if len(rs) > 3:
            r1, r2, r3 = rs[-2], rs[-1], r
            d1, d2, d3 = [r.stop_distance for r in [r1, r2, r3]]
            if (d2 <= threshold
                    and d1 > d2
                    and d2 > d3):
                stops = stops + [r2]
        return (rs + [r], stops)
    rs, stops = reduce(f, stop_distances(trip, stop).itertuples(), ([], []))    
    return stops        
#     ds = {r: stop_distance(r, stop) for r in trip.itertuples()}
#     return list(takewhile(lambda r: ds[r] < 100, sorted(ds, key = lambda r: ds[r])))

In [37]:
ts = trips(df)
print(f"{len(ts)} trips")
trip_stop_times = [stop_rows(shopping_centre, t, threshold=1000) for t in ts]
all_stop_times = [t for ts in trip_stop_times for t in ts]
print(f"{len(all_stop_times)} stop times")

853 trips
1110 stop times


In [52]:
pd.DataFrame(all_stop_times, index=[s.Index for s in all_stop_times])[['trip_id', 'stop_distance', 'latitude', 'longitude']].sort_index()

Unnamed: 0,trip_id,stop_distance,latitude,longitude
2018-10-22 16:54:27.083,7338656568183682057,749.989648,51.888339,-8.58546
2018-10-22 16:56:27.985,7338656568183682057,893.4458,51.888655,-8.583405
2018-10-22 17:02:42.470,7338656568208385033,982.313079,51.888855,-8.582138
2018-10-22 17:06:05.647,7338656568208385033,321.927786,51.888013,-8.591664
2018-10-22 17:09:03.364,7338656568208385033,792.24319,51.886305,-8.607512
2018-10-23 07:58:09.336,7338656568214361347,781.464461,51.888411,-8.585008
2018-10-23 07:59:47.212,7338656568208372995,883.98937,51.888633,-8.58354
2018-10-23 07:59:48.179,7338656568208372995,883.98937,51.888633,-8.58354
2018-10-23 08:00:26.743,7338656568208372995,536.632448,51.888039,-8.588546
2018-10-23 08:16:18.118,7338656568208372995,136.494516,51.887849,-8.598305


In [67]:
def trip_for_id(tid: int) -> Optional[int]:
    try:
        return ts[[i for i, t in enumerate(ts) if t["trip_id"][0] == tid][0]]
    except IndexError:
        return None

In [68]:
stop_distances(trip_for_id(7338656568183682057).sort_index(), stop=shopping_centre)[['trip_id', 'latitude', 'longitude', 'bearing', 'stop_distance']]

Unnamed: 0_level_0,trip_id,latitude,longitude,bearing,stop_distance
last_modified,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-10-22 16:20:20.783,7338656568183682057,51.888019,-8.588834,264,516.810289
2018-10-22 16:51:29.473,7338656568183682057,51.888008,-8.590768,90,383.624332
2018-10-22 16:51:51.812,7338656568183682057,51.888008,-8.590768,90,383.624332
2018-10-22 16:52:09.035,7338656568183682057,51.888008,-8.590768,90,383.624332
2018-10-22 16:52:52.716,7338656568183682057,51.888008,-8.590768,90,383.624332
2018-10-22 16:53:08.473,7338656568183682057,51.888007,-8.590407,90,408.486626
2018-10-22 16:53:35.321,7338656568183682057,51.888007,-8.590321,90,414.434436
2018-10-22 16:54:04.743,7338656568183682057,51.888059,-8.588273,83,555.448632
2018-10-22 16:54:27.083,7338656568183682057,51.888339,-8.58546,76,749.989648
2018-10-22 16:54:46.412,7338656568183682057,51.888339,-8.58546,76,749.989648
