In [401]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

import multiprocessing as mp
import datetime as dt
import scipy.linalg
import scipy.stats
import shapely.wkt 
import itertools
import polyline
import pytz
import copy


from datetime import date
from datetime import datetime
from functools import partial
from collections import Counter
from sklearn.neighbors import BallTree
from shapely.geometry import Point,LineString
from tqdm import tqdm 

# Import data

In [402]:
# Import camera to roads dataframe
cameras_to_roads_filenames = "../data/output/misc/cameras_to_roads.csv"
cameras_to_roads = pd.read_csv(cameras_to_roads_filenames)

# Clean data

In [403]:
# Keep only necessary columns
cameras_to_roads = cameras_to_roads[['camera_id','road_id','road_geometry']]
# Convert camera id from float to string
cameras_to_roads['camera_id'] = cameras_to_roads.camera_id.astype('str')
cameras_to_roads['road_id'] = cameras_to_roads.road_id.astype('str')
# Remove duplicate rows (i.e. rows with same camera id and road id)
cameras_to_roads = cameras_to_roads.drop_duplicates(subset=['camera_id','road_id'])
# Convert string to geometry
cameras_to_roads['road_geometry'] = cameras_to_roads['road_geometry'].apply(lambda x: shapely.wkt.loads(x))
# Convert dataframe to geodataframe
cameras_to_roads_gdf = gpd.GeoDataFrame(cameras_to_roads,geometry='road_geometry',crs="EPSG:4326")
# # Extract endpoints from road into tuple
# cameras_to_roads_gdf['Origin'] = cameras_to_roads_gdf.road_geometry.apply(lambda x: x.coords[0])
# cameras_to_roads_gdf['Destination'] = cameras_to_roads_gdf.road_geometry.apply(lambda x: x.coords[-1])
#Extract endpoints from road into columns
cameras_to_roads_gdf[['OLon','OLat']] = cameras_to_roads_gdf.road_geometry.apply(lambda x: x.coords[0]).apply(pd.Series)
cameras_to_roads_gdf[['DLon','DLat']] = cameras_to_roads_gdf.road_geometry.apply(lambda x: x.coords[-1]).apply(pd.Series)
# Create dummy key for outer join
cameras_to_roads_gdf['key'] = 0
# Remove duplicate roads
cameras_to_roads_gdf = cameras_to_roads_gdf.drop_duplicates(subset=['road_id'])
# Create roads closest to cameras geodataframe
roads_closest_to_cameras = cameras_to_roads_gdf[['road_id','road_geometry','camera_id']]
# Group camera ids by road ids
roads_closest_to_cameras = cameras_to_roads_gdf.groupby(cameras_to_roads_gdf.road_id).agg({'camera_id':', '.join,'road_geometry':'first'}).reset_index()
# Convert dataframe to geodataframe
roads_closest_to_cameras = gpd.GeoDataFrame(roads_closest_to_cameras,geometry='road_geometry')

In [404]:
# Decide on how many origins/destinations to group together into one polyline
num_points = 10
encoding_precision = 5

# Encode origin and destination points using Google’s Encoded Polyline Algorithm
cameras_to_roads_gdf['Oencoded'] = cameras_to_roads_gdf.road_geometry.apply(lambda x: ('enc:'+polyline.encode([x.coords[0][::-1]],encoding_precision)+":"))
cameras_to_roads_gdf['Dencoded'] = cameras_to_roads_gdf.road_geometry.apply(lambda x: ('enc:'+polyline.encode([x.coords[-1][::-1]],encoding_precision)+":"))
# Format string in coordinates
cameras_to_roads_gdf['OLat'] = cameras_to_roads_gdf.OLat.apply(lambda x: '{0:.10f}'.format(x))
cameras_to_roads_gdf['OLon'] = cameras_to_roads_gdf.OLon.apply(lambda x: '{0:.10f}'.format(x))
cameras_to_roads_gdf['DLat'] = cameras_to_roads_gdf.DLat.apply(lambda x: '{0:.10f}'.format(x))
cameras_to_roads_gdf['DLon'] = cameras_to_roads_gdf.DLon.apply(lambda x: '{0:.10f}'.format(x))
# Convert coordinates to strings
cameras_to_roads_gdf[['OLat_str','OLon_str']] = cameras_to_roads_gdf[['OLat','OLon']].astype('str')
cameras_to_roads_gdf[['DLat_str','DLon_str']] = cameras_to_roads_gdf[['DLat','DLon']].astype('str')
# Join columns and separate them by commas
cameras_to_roads_gdf['Olatlon'] = cameras_to_roads_gdf[['OLat_str','OLon_str']].apply(lambda x: ','.join(x[x.notnull()]), axis = 1).astype('str')
cameras_to_roads_gdf['Dlatlon'] = cameras_to_roads_gdf[['DLat_str','DLon_str']].apply(lambda x: ','.join(x[x.notnull()]), axis = 1).astype('str')

if num_points > 1:
    # Concatenate every num_points rows/roads into one
    cameras_to_roads_gdf_grouped = cameras_to_roads_gdf.groupby(cameras_to_roads_gdf.index // num_points).agg({
                "Oencoded":'|'.join,
                "Dencoded":'|'.join,
                "camera_id":','.join,
                "road_id":','.join,
                "Olatlon":'|'.join,
                "Dlatlon":'|'.join})
    
    # Create dummy key for outer join
    cameras_to_roads_gdf_grouped['key'] = 0
    
    print(f'Number of road requests from {cameras_to_roads_gdf.shape[0]} reduced to {cameras_to_roads_gdf_grouped.shape[0]}')

Number of road requests from 2331 reduced to 244


# Issues

Camera id 1.03608 is overlooking road ids 70712,214784,5221390839801416325_5221390839801416325 and
road ids 214784 and 5221390839801416325_5221390839801416325 have the same start point but different endpoints.

In [405]:
# Sanity check that there is not even one duplicate tuple of encoded origins and destinations - so these tuples can be used as unique ids
try:
    assert cameras_to_roads_gdf_grouped.duplicated(subset=['Oencoded','Dencoded']).any()==False
except:
    print(cameras_to_roads_gdf_grouped[cameras_to_roads_gdf_grouped.duplicated(subset=['Oencoded','Dencoded'])])

# Decide on timeframe and transport modes

In [406]:
# Decide on start and end dates
start_date = '2021-03-08'
end_date = '2021-03-21'

# Specify start and end times
starttime = dt.time(2,0,0)
endtime = dt.time(17,0,0)

# Specify date and time detla
date_delta = dt.timedelta(days=1)
time_delta = dt.timedelta(hours=1)

# Decide on transport modes
modes = ['driving','bicycling','transit']
transit_modes = ['','','bus']

In [408]:
# Convert strings to dates
startdate = datetime.strptime(start_date, '%Y-%m-%d')
startdate = startdate.date()
enddate = datetime.strptime(end_date, '%Y-%m-%d')
enddate = enddate.date()

years = []
months = []
days = []
hours_utc = []
datetime_utc = []
timestamp_utc = []
# Iterate between start and end dates
while startdate <= enddate:
    startdatetime = datetime.combine(startdate, starttime, tzinfo=pytz.timezone('GMT'))
    enddatetime = datetime.combine(startdate, endtime, tzinfo=pytz.timezone('GMT'))
    # Iterate between start and end times
    while startdatetime <= enddatetime:
#         print(startdatetime)
        startdatetime += time_delta
        # Append results
        years.append(startdatetime.year)
        months.append(startdatetime.month)
        days.append(startdatetime.day)
        hours_utc.append(startdatetime.hour)
        datetime_utc.append(startdatetime)
        timestamp_utc.append(int(startdatetime.timestamp()))
    startdate += date_delta

In [409]:
# Create transport modes dataframe
modes_df = pd.DataFrame({'mode': modes,'transit_mode': transit_modes})
# Create dummy key for outer join
modes_df['key'] = 0

# Create datetime dataframe
datetime_df = pd.DataFrame({'departure_time': timestamp_utc,
                            'departure_datetime': datetime_utc,
                            'year':years,
                            'month':months,
                            'day':days,
                            'hour':hours_utc
                           })
# Create dummy key for outer join
datetime_df['key'] = 0

# Outer join the two dataframes
mode_datetime_df = datetime_df.merge(modes_df, how='outer')

# Outer join last dataframe with camera_road dataframe
road_api_requests = cameras_to_roads_gdf.merge(mode_datetime_df, how='outer')
road_api_requests_grouped = cameras_to_roads_gdf_grouped.merge(mode_datetime_df, how='outer')

# Drop dummy columns
road_api_requests = road_api_requests.drop(columns=['key'])
road_api_requests_grouped = road_api_requests_grouped.drop(columns=['key'])

print('Ungrouped requests:',road_api_requests.shape[0])
print('Grouped requests:',road_api_requests_grouped.shape[0],'i.e.',int(100*(road_api_requests.shape[0]-road_api_requests_grouped.shape[0])/road_api_requests.shape[0]),'% reduction')

Ungrouped requests: 1566432
Grouped requests: 163968 i.e. 89 % reduction


# Export requests

In [398]:
# roads_closest_to_cameras.to_file("../data/output/misc/arup_roads_closest_to_cameras_london.geojson", driver='GeoJSON', crs='EPSG:4326')

In [411]:
road_api_requests.head(1).to_csv('../data/output/misc/camera_road_api_requests_sample.csv',index=False)
road_api_requests_grouped.head(1).to_csv('../data/output/misc/camera_road_api_requests_grouped_sample.csv',index=False)

In [412]:
road_api_requests.to_csv('../data/output/misc/camera_road_api_requests.csv',index=False)
road_api_requests_grouped.to_csv('../data/output/misc/camera_road_api_requests_grouped.csv',index=False)