In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import scipy.linalg
import scipy.stats
import copy
import shapely.wkt 
import itertools
import multiprocessing as mp

from functools import partial
from sklearn.neighbors import BallTree
from shapely.geometry import Point,LineString
from tqdm import tqdm 

# Import data

In [3]:
# Import camera metadata
camera_metadata_filename = "../data/raw/camera_metadata.csv"
camera_metadata = pd.read_csv(camera_metadata_filename)

# Import camera metadata
london_roads_filename = "../data/raw/arup_network_links_london2.geojson"
# london_roads = gpd.read_file(london_roads_filename)s

# Clean data

In [4]:
# Keep only working cameras
camera_metadata = camera_metadata[camera_metadata.flag == 0]
# Keep only necessary columns
camera_metadata = camera_metadata[['id', 'camera_id', 'location', 'notes','borough_name', 'borough_gss_code']]
# Convert string to point
camera_metadata['location'] = camera_metadata['location'].apply(lambda x: shapely.wkt.loads(x))

# Convert pandas to geopandas
camera_metadata_geo = gpd.GeoDataFrame(camera_metadata, geometry=camera_metadata.location)

In [16]:
# Remove unnecessary data
london_roads = london_roads.drop(columns=['s2_from','s2_to','ids','u','v','key'])
london_roads = london_roads.rename(columns={"id":"road_id"})
london_roads = london_roads[london_roads.modes.str.contains("car")]
# Export roads only
# london_roads.to_file("../data/raw/arup_road_links_london2.geojson", driver='GeoJSON', crs='EPSG:4326')

# Prepare camera data metadata collection

In [33]:
camera_metadata_geo_metadata = camera_metadata_geo[['camera_id','geometry','notes']]
camera_metadata_geo_metadata = camera_metadata_geo_metadata.loc[camera_metadata_geo_metadata.index.repeat(12)]
camera_metadata_geo_metadata['ambiguous_collection'] = False
camera_metadata_geo_metadata['area_classification'] = ''
camera_metadata_geo_metadata['camera_dislocated'] = False
camera_metadata_geo_metadata['camera_visibility'] = 'high'
camera_metadata_geo_metadata['visibility_reason'] = ''
camera_metadata_geo_metadata['pedestrian_crossing_exists'] = False
camera_metadata_geo_metadata['cycle_path_exists'] = False
camera_metadata_geo_metadata['bus_lane_exists'] = False
camera_metadata_geo_metadata['on_bridge'] = False
camera_metadata_geo_metadata['road_works_exist'] = False
camera_metadata_geo_metadata['partially_observed_road'] = False
camera_metadata_geo_metadata[['road_id','road_network_name']] = ''

In [35]:
camera_metadata_geo_metadata.to_csv('../data/input/tfl_camera_detailed_metadata.csv')

# Find road closest to camera

In [30]:
def nearest(p,lines,k):
    elem = {}
    elem['camera_id'] = p[1]

    # Get distances between camera and roads
    camera_distances = [(p[0].distance(r),_id) for r,_id in lines]

    # Get k nearest neighbours
    camera_nearest_neighbours = sorted(camera_distances, key = lambda t: t[0])[0:k]

    # Get list of road ids
    closest_road_ids = [x[1] for x in camera_nearest_neighbours]
    closest_road_distances = [x[0] for x in camera_nearest_neighbours]

    # Append results
    elem['road_ids'] = closest_road_ids
    elem['distance'] = closest_road_distances
    
    return elem
    
def nearest_roads(source_gpd,dest_gpd,k=1):
    roads_nearest_to_camera = []
    
    # Get list of roads and their ids
    roads = dest_gpd[['geometry','road_id']].values
    
    # Get list of cameras and their ids
    cameras = source_gpd[['geometry','camera_id']].values
    
    # Parallelise across 4 cores
    with mp.Pool(processes = 4) as process:
        with tqdm(total=cameras.shape[0]) as pbar:
            for i, elem in enumerate(process.imap_unordered(partial(nearest, lines=roads, k=k), cameras)):
                roads_nearest_to_camera.append(elem)
                pbar.update()
    
    return roads_nearest_to_camera

In [32]:
roads_nearest_to_cameras = nearest_roads(camera_metadata_geo,london_roads,3)

100%|██████████| 804/804 [1:19:14<00:00,  5.91s/it]


In [33]:
# Create a dataframe from list of dictionaries
roads_nearest_to_cameras_df = pd.DataFrame(roads_nearest_to_cameras)

# Get road ids
road_ids = roads_nearest_to_cameras_df['road_ids'].values
# Merge all road ids
road_ids = np.array(list(itertools.chain.from_iterable(road_ids)))

# Create a column for each road list element
roads_nearest_to_cameras_df[['road_id1','road_id2','road_id3']] = pd.DataFrame(roads_nearest_to_cameras_df['road_ids'].to_list(), columns=['road_id1','road_id2','road_id3'])

# Drop unnecessary columns
roads_nearest_to_cameras_df = roads_nearest_to_cameras_df.drop(columns=['road_ids'])

# Stack camera and road id dataframes in list 
camera_road_pairs = []
for i in range(1,4):
    camera_road_pairs.append(roads_nearest_to_cameras_df[['camera_id',f'road_id{i}']].values)
camera_road_pairs = np.array(list(itertools.chain.from_iterable(camera_road_pairs)))

In [34]:
# Create a mapping between camera and roads 
camera_road_pairs = []
for i in range(1,4):
    camera_road_pairs.append(roads_nearest_to_cameras_df[['camera_id',f'road_id{i}']].values)
camera_road_pairs = np.array(list(itertools.chain.from_iterable(camera_road_pairs)))

camera_to_road_mapping = pd.DataFrame(camera_road_pairs, columns=['camera_id','road_id'])
# Join camera dataframe and road dataframe
camera_roads = pd.merge(camera_metadata_geo, camera_to_road_mapping, on="camera_id")

camera_roads = pd.merge(camera_roads,london_roads, on="road_id")

camera_roads = camera_roads.rename(columns={"geometry_x":"camera_geometry","geometry_y":"road_geometry"})

camera_roads = camera_roads.drop(columns=['id','location'])

## Export data

In [35]:
camera_roads.to_csv('../data/output/misc/cameras_to_roads.csv')