# Dublin Buses - Missing Data

Prerequisites: `01-clean-data.ipynb`

Before running the code in this notebook, you must run the previous notebook and any prerequisite thereof.

In [1]:
import numpy as np
import pandas as pd
import osmnx as ox
import json
import requests
import osmread

from pathlib import Path
from db import MapDb
from leuvenmapmatching.matcher.distance import DistanceMatcher
from leuvenmapmatching.map.inmem import InMemMap
from leuvenmapmatching.map.sqlite import SqliteMap

In [2]:
df = pd.read_parquet("data/fixed.parquet")

In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31002957 entries, 0 to 31002956
Data columns (total 16 columns):
Timestamp     int64
LineID        int32
Direction     int64
PatternID     object
JourneyID     int32
Congestion    int8
Lon           float64
Lat           float64
Delay         int8
BlockID       int64
VehicleID     int32
StopID        int32
AtStop        int8
dt            float64
dx            float64
v             float64
dtypes: float64(5), int32(4), int64(3), int8(3), object(1)
memory usage: 2.6+ GB


In [14]:
df.isnull().sum()

Timestamp           0
LineID              0
Direction           0
PatternID     3228174
JourneyID           0
Congestion          0
Lon                 0
Lat                 0
Delay               0
BlockID             0
VehicleID           0
StopID              0
AtStop              0
dt                  0
dx                  0
v                   0
dtype: int64

In [15]:
vehicles = df.VehicleID.unique()

In [17]:
vehicles.shape

(869,)

In [3]:
bbox = {'west': df['Lon'].min(), 
        'east': df['Lon'].max(), 
        'north': df['Lat'].max(), 
        'south': df['Lat'].min()}

with open('data/bbox.txt', 'w') as json_file:
  json.dump(bbox, json_file)

In [4]:
g = ox.core.graph_from_bbox(bbox['north'], bbox['south'], bbox['east'], bbox['west'], network_type='drive', simplify=False)

In [6]:
graph_proj = ox.project_graph(g)

In [7]:
nodes_proj, edges_proj = ox.graph_to_gdfs(graph_proj, nodes=True, edges=True)

In [8]:
nodes_proj

Unnamed: 0,y,x,osmid,highway,ref,lon,lat,geometry
255328256,5.921690e+06,685036.101476,255328256,,,-6.215936,53.411774,POINT (685036.101 5921689.507)
255328257,5.921712e+06,685004.109038,255328257,,,-6.216403,53.411988,POINT (685004.109 5921712.071)
255328258,5.921711e+06,684995.562143,255328258,,,-6.216532,53.411978,POINT (684995.562 5921710.590)
255328259,5.921703e+06,684986.551676,255328259,,,-6.216672,53.411915,POINT (684986.552 5921703.255)
255328260,5.921657e+06,685002.807794,255328260,,,-6.216455,53.411494,POINT (685002.808 5921656.931)
...,...,...,...,...,...,...,...,...
255328251,5.921521e+06,684228.169892,255328251,,,-6.228175,53.410541,POINT (684228.170 5921520.631)
255328252,5.921513e+06,684244.881765,255328252,,,-6.227928,53.410464,POINT (684244.882 5921512.726)
255328253,5.921707e+06,685075.625745,255328253,,,-6.215331,53.411918,POINT (685075.626 5921707.034)
255328254,5.921697e+06,685063.521288,255328254,,,-6.215519,53.411836,POINT (685063.521 5921697.472)


In [9]:
edges_proj

Unnamed: 0,u,v,key,osmid,oneway,lanes,ref,name,highway,length,geometry,junction,maxspeed,access,bridge,tunnel,width,service,est_width
0,255328256,255328255,0,237518648,True,1,L2051,Clonshaugh Road,tertiary,10.802,"LINESTRING (685036.101 5921689.507, 685046.850...",,,,,,,,
1,255328257,4883731143,0,2712795,True,2,L2051,Clonshaugh Road,tertiary,5.037,"LINESTRING (685004.109 5921712.071, 685009.087...",roundabout,,,,,,,
2,255328258,4883731126,0,2712795,True,2,L2051,Clonshaugh Road,tertiary,3.660,"LINESTRING (684995.562 5921710.590, 684999.074...",roundabout,,,,,,,
3,255328259,4883731150,0,2712795,True,2,L2051,Clonshaugh Road,tertiary,5.859,"LINESTRING (684986.552 5921703.255, 684990.515...",roundabout,,,,,,,
4,255328260,255328261,0,2712795,True,2,L2051,Clonshaugh Road,tertiary,5.687,"LINESTRING (685002.808 5921656.931, 684997.105...",roundabout,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
472225,255328253,255328254,0,242870597,False,2,L2051,Clonshaugh Road,tertiary,15.387,"LINESTRING (685075.626 5921707.034, 685063.521...",,,,,,,,
472226,255328254,255328253,0,242870597,False,2,L2051,Clonshaugh Road,tertiary,15.387,"LINESTRING (685063.521 5921697.472, 685075.626...",,,,,,,,
472227,255328254,3768613521,0,242870597,False,2,L2051,Clonshaugh Road,tertiary,2.824,"LINESTRING (685063.521 5921697.472, 685061.053...",,,,,,,,
472228,2373976063,4159965431,0,732239237,False,3,R110,Crumlin Road,secondary,45.772,"LINESTRING (679738.369 5912312.855, 679780.863...",,50,,,,,,


In [None]:
xml_file = Path(".") / "osm.xml"

In [None]:
url = 'http://overpass-api.de/api/map?bbox={0},{1},{2},{3}'.format(bbox['west'], bbox['south'], bbox['east'], bbox['north'])

In [None]:
url

In [None]:
xml_file = Path("./data") / "osm.xml"

In [None]:
r = requests.get(url, stream=True)
with xml_file.open('wb') as ofile:
    for chunk in r.iter_content(chunk_size=1024):
        if chunk:
            ofile.write(chunk)

In [None]:
map_con = SqliteMap("dublin", dir="./db")

In [None]:
from tqdm import tqdm

In [None]:
for entity in tqdm(osmread.parse_file(str(xml_file))):
    if isinstance(entity, osmread.Way) and 'highway' in entity.tags:
        # print(entity)
        for node_a, node_b in zip(entity.nodes, entity.nodes[1:]):
            map_con.add_edge(node_a, node_b)
            # Some roads are one-way. We'll add both directions.
            # map_con.add_edge(node_b, node_a)
    if isinstance(entity, osmread.Node):
        map_con.add_node(entity.id, (entity.lat, entity.lon))
map_con.purge()

In [10]:
map_con = InMemMap("mymap", graph={
    "A": ((1, 1), ["B", "C", "X"]),
    "B": ((1, 3), ["A", "C", "D", "K"]),
    "C": ((2, 2), ["A", "B", "D", "E", "X", "Y"]),
    "D": ((2, 4), ["B", "C", "F", "E", "K", "L"]),
    "E": ((3, 3), ["C", "D", "F", "Y"]),
    "F": ((3, 5), ["D", "E", "L"]),
    "X": ((2, 0), ["A", "C", "Y"]),
    "Y": ((3, 1), ["X", "C", "E"]),
    "K": ((1, 5), ["B", "D", "L"]),
    "L": ((2, 6), ["K", "D", "F"])
}, use_latlon=False)

path = [(0.8, 0.7), (0.9, 0.7), (1.1, 1.0), (1.2, 1.5), (1.2, 1.6), (1.1, 2.0),
        (1.1, 2.3), (1.3, 2.9), (1.2, 3.1), (1.5, 3.2), (1.8, 3.5), (2.0, 3.7),
        (2.3, 3.5), (2.4, 3.2), (2.6, 3.1), (2.9, 3.1), (3.0, 3.2),
        (3.1, 3.8), (3.0, 4.0), (3.1, 4.3), (3.1, 4.6), (3.0, 4.9)]

matcher = DistanceMatcher(map_con, max_dist=2, obs_noise=1, min_prob_norm=0.5, only_edges=False)
states, x = matcher.match(path)
nodes = matcher.path_pred_onlynodes

print("States\n------")
print(states)
print("Nodes\n------")
print(nodes)

print("")
matcher.print_lattice_stats()

  return _prepare_from_string(" ".join(pjargs))
  return _prepare_from_string(" ".join(pjargs))
The MatcherDistance method only works on edges as states. Nodes have been disabled.
Searching closeby nodes with linear search, use an index and set max_dist


States
------
[('X', 'A'), ('A', 'B'), ('A', 'B'), ('A', 'B'), ('A', 'B'), ('A', 'B'), ('A', 'B'), ('A', 'B'), ('B', 'D'), ('B', 'D'), ('B', 'D'), ('B', 'D'), ('D', 'E'), ('D', 'E'), ('D', 'E'), ('E', 'F'), ('E', 'F'), ('E', 'F'), ('E', 'F'), ('E', 'F'), ('E', 'F'), ('E', 'F')]
Nodes
------
['X', 'A', 'B', 'D', 'E', 'F']

Stats lattice
-------------
nbr levels               : 22
nbr lattice              : 1002
avg lattice[level]       : 45.54545454545455
min lattice[level]       : 7
max lattice[level]       : 97
avg obs distance         : 0.15514927458475236
last logprob             : -0.5464565099511667
last length              : 22
last norm logprob        : -0.024838932270507576


In [13]:
matcher.path_pred_onlynodes

['X', 'A', 'B', 'D', 'E', 'F']

In [None]:
with open('data/bbox.txt', 'r') as json_file:
    bbox = json.load(json_file)
    print(bbox)

In [None]:
G = ox.core.graph_from_bbox(north=bbox['north'], south=bbox['south'], east=bbox['east'], west=bbox['west'], network_type='drive')

In [None]:
ox.plot.plot_graph(G, fig_height=12, node_alpha=0.5, edge_alpha=0.5)