## Exploring folium with co-locating of TIMS and jamcam data as test case
01 July 2019, Jack Hensley

In [1]:
import folium
import boto3
import pandas as pd
import operator
import numpy as np
from scipy.spatial.distance import cdist
import json
% matplotlib inline

import tims data and explore how the detectors look on a node level

In [2]:
# import tims and jam cam locations

# tims
session = boto3.Session(profile_name='dssg')
s3 = session.client('s3')
bucket_name = 'air-pollution-uk'
obj = s3.get_object(Bucket=bucket_name, Key='raw/tims_data/detdata01032019-204523.csv')
tims_df = pd.read_csv(obj['Body']) # 'Body' is a key word

tims_df.head()

Unnamed: 0,TIMESTAMP,NODE,EASTING,NORTHING,FLOW_ACTUAL_15M,SAT_BANDINGS,DETECTOR_NO,TOTAL_DETECTOR_NO,DETECTOR_RATE
0,2019-03-01T20:31:00Z,29/013,513643.42,185553.97,459,0-79%,5,5,1.0
1,2019-03-01T20:31:00Z,29/020,513010.0,187530.0,572,0-79%,3,3,1.0
2,2019-03-01T20:31:00Z,29/028,514272.61,186392.99,456,0-79%,2,2,1.0
3,2019-03-01T20:31:00Z,29/035,517233.36,192368.23,291,0-79%,6,6,1.0
4,2019-03-01T20:31:00Z,29/046,517103.35,192319.26,572,0-79%,2,2,1.0


how many nodes have just one detector?

In [3]:
tims_df.DETECTOR_NO.unique()

array([ 5,  3,  2,  6,  4,  1,  7,  8,  9, 10, 11, 12, 13, 14, 15, 17, 18],
      dtype=int64)

In [4]:
tims_df.groupby('TOTAL_DETECTOR_NO')['NODE'].nunique()

TOTAL_DETECTOR_NO
1      617
2     1070
3      742
4      694
5      386
6      196
7      113
8       70
9       33
10      21
11      12
12       4
13       7
14       5
15       2
17       4
18       1
Name: NODE, dtype: int64

In [5]:
tims_df.shape

(49043, 9)

In [6]:
tims_df[tims_df.TOTAL_DETECTOR_NO == 1].shape

(7535, 9)

the desire is to have one detector nodes within central london that could possibly have a jamcam nearby.

In [7]:
central_london_easting = 530034
central_london_northing = 180381

In [8]:
tims_df.loc[(operator.and_(
    tims_df.EASTING > central_london_easting-5000, tims_df.EASTING < central_london_easting+5000)) &
            (operator.and_(
    tims_df.NORTHING > central_london_northing-5000, tims_df.NORTHING < central_london_northing+5000)) &
            tims_df.TOTAL_DETECTOR_NO == 1
           ].shape

(7507, 9)

In [9]:
tims_eval_df = tims_df.loc[(operator.and_(
    tims_df.EASTING > central_london_easting-5000, tims_df.EASTING < central_london_easting+5000)) &
            (operator.and_(
    tims_df.NORTHING > central_london_northing-5000, tims_df.NORTHING < central_london_northing+5000)) &
            tims_df.TOTAL_DETECTOR_NO == 1
           ]

tims_eval_df.head()

Unnamed: 0,TIMESTAMP,NODE,EASTING,NORTHING,FLOW_ACTUAL_15M,SAT_BANDINGS,DETECTOR_NO,TOTAL_DETECTOR_NO,DETECTOR_RATE
125,2019-03-01T20:30:00Z,04/044,533256.43,182639.73,327,90-99%,1,3,0.33
126,2019-03-01T20:30:00Z,04/045,533490.0,182280.0,354,>=100%,5,5,1.0
130,2019-03-01T20:30:00Z,04/049,534941.38,184946.2,401,0-79%,1,1,1.0
131,2019-03-01T20:30:00Z,04/051,534632.34,185187.43,314,>=100%,7,7,1.0
133,2019-03-01T20:30:00Z,04/056,534943.98,184633.13,337,0-79%,3,3,1.0


df of tims locations in central london with only one detector per node selected. let's see if they're near jam cams.

In [10]:
session = boto3.Session(profile_name='dssg')
s3 = session.client('s3')
bucket_name = 'air-pollution-uk'
obj = s3.get_object(Bucket=bucket_name, Key='processed_data/tims/node_coords.csv')
tims_df_locs = pd.read_csv(obj['Body'])

tims_df_locs.head()

Unnamed: 0.1,Unnamed: 0,NODE,EASTING,NORTHING,LONGITUDE,LATITUDE
0,0,00/002,531633.0,181174.0,-0.104402,51.514163
1,1,00/003,533656.17,180943.61,-0.07535,51.511618
2,2,00/004,531400.0,181550.0,-0.107617,51.517597
3,3,00/005,531389.58,180817.87,-0.10804,51.51102
4,4,00/006,531648.94,180896.39,-0.104276,51.511665


In [11]:
# find tims_df_locs that are same as the ones selected, so that longitude and latitude can be selected out
common = tims_eval_df.merge(tims_df_locs, on=['NODE'])
common.head()

tims_eval_df_locs = tims_df_locs[tims_df_locs.NODE.isin(common.NODE)]
tims_eval_df_locs.shape

(574, 6)

In [12]:
tims_locations = tims_eval_df_locs[['LATITUDE', 'LONGITUDE']]
tims_location_list = tims_locations.values.tolist()
tims_location_list[0:10]

[[51.51161845046011, -0.07535009391243043],
 [51.517596509059146, -0.10761739938442924],
 [51.51101961830904, -0.10804012179092776],
 [51.51732305016336, -0.0887503477742653],
 [51.51476644331376, -0.08972053714003989],
 [51.51335250261293, -0.08895632333614241],
 [51.51263828159642, -0.09256167167025632],
 [51.51085387334042, -0.09308065260619422],
 [51.510840990990474, -0.08641521089281938],
 [51.51342219821471, -0.08408395903449907]]

plot on city of london map

In [13]:
London = [51.506949, -0.122876]
londonmap = folium.Map(
    width=500, height=500,
    location = London,
    zoom_start = 12, 
    tiles = 'stamentoner')

for point in range(0, len(tims_location_list)):
    folium.CircleMarker(tims_location_list[point],
                        radius=1,
                        color='red',
                        fill_color='red',
                        fill_opacity=0.2
                       ).add_to(londonmap)
    
londonmap

find jamcams that are in similar locations

In [14]:
obj = s3.get_object(Bucket=bucket_name, Key='processed_data/jamcams/jamcam_coords.csv')
jc_df = pd.read_csv(obj['Body'])

jc_df.head()

Unnamed: 0,NAME,LATITUDE,LONGITUDE
0,Romford_Rd/Tennyson_Rd,51.5421,0.00524
1,Piccadilly_Circus,51.5096,-0.13484
2,Blackheath_Rd/Greenwich_High_Rd,51.4742,-0.02073
3,Edgware_Way/Broadfields_Ave,51.6216,-0.27384
4,Cromwell_Rd/Earls_Court_Rd,51.4946,-0.1957


In [15]:
jc_files = pd.read_json('C:/Users/joh3146/Downloads/cam_file.json')
jc_df = jc_files.T

list(jc_df.columns.values)

['$type',
 'additionalProperties',
 'children',
 'childrenUrls',
 'commonName',
 'id',
 'lat',
 'lon',
 'placeType',
 'url']

In [16]:
jc_df = jc_df.rename(index=str, columns={"lat": "LATITUDE", "lon": "LONGITUDE"})

In [17]:
jc_locations = jc_df[['LATITUDE', 'LONGITUDE']]
jc_location_list = jc_locations.values.tolist()
jc_location_list[0:10]

[[51.5421, 0.005240000000000001],
 [51.5096, -0.13484000000000002],
 [51.4742, -0.020730000000000002],
 [51.6216, -0.27384000000000003],
 [51.4946, -0.1957],
 [51.514, -0.15409],
 [51.441, -0.10633000000000001],
 [51.5336, -0.02256],
 [51.4949, -0.12891],
 [51.3741, -0.09646]]

find the 20 nearest neighbors, and then report their indices

In [18]:
jc_tims_locs = cdist(jc_location_list, tims_location_list)
jc_tims_locs.shape

(911, 574)

In [19]:
nearest_tims_to_jc_locs = jc_tims_locs.min(axis=1)
nearest_tims_to_jc_locs.shape

(911,)

In [20]:
jc_nearest_tims_idx = np.argsort(nearest_tims_to_jc_locs)[0:20]
jc_nearest_tims_idx

array([200, 908, 434, 360, 113, 376, 154, 372, 205, 589,  61, 504, 903,
       558, 612, 368, 580, 354, 841, 630], dtype=int64)

In [35]:
jc_nearest_tims_locs = np.take(jc_tims_locs, jc_nearest_tims_idx, axis=0)
jc_nearest_tims_locs.shape

(20, 574)

In [29]:
tims_nearest_jc_idx = np.argmin(jc_nearest_tims_locs, axis=1)
tims_nearest_jc_idx

array([382, 267, 327, 544, 101, 471, 515, 489,  77, 223, 150, 149, 455,
        72, 547, 376, 129, 293, 381, 204], dtype=int64)

In [47]:
tims_df_out = tims_eval_df_locs.take(tims_nearest_jc_idx)
tims_df_out = tims_df_out.drop("Unnamed: 0", axis=1)
tims_df_out.head()

Unnamed: 0,NODE,EASTING,NORTHING,LONGITUDE,LATITUDE
1217,08/027,531650.0,180400.0,-0.104445,51.507204
522,02/235,530522.6,181743.9,-0.120184,51.519542
695,04/044,533256.43,182639.73,-0.080466,51.526955
1856,12/017,526880.52,179004.04,-0.173632,51.495749
188,01/161,527128.3,181622.62,-0.169122,51.519226


In [48]:
tims_df_out.to_csv(path_or_buf='C:/Users/joh3146/Downloads/scoot_nodes_for_dssg.csv', sep=',', index=False)

select of jc_df the indices corresponding to the 20 nearest neighbors and export to csv

In [21]:
jc_df_out = jc_df.take(jc_nearest_tims_idx)
jc_df_out.index

Index(['JamCams_00001.03601', 'JamCams_00001.07591', 'JamCams_00001.01252',
       'JamCams_00001.06597', 'JamCams_00001.08853', 'JamCams_00001.06510',
       'JamCams_00001.04280', 'JamCams_00001.04534', 'JamCams_00001.06590',
       'JamCams_00001.07382', 'JamCams_00001.04328', 'JamCams_00001.06514',
       'JamCams_00001.03604', 'JamCams_00001.06501', 'JamCams_00001.05900',
       'JamCams_00001.03490', 'JamCams_00001.08926', 'JamCams_00001.07355',
       'JamCams_00001.04336', 'JamCams_00001.09560'],
      dtype='object')

In [22]:
jc_df_out.to_csv(path_or_buf='C:/Users/joh3146/Downloads/jamcams_near_tims.csv', sep=',', index=False)

plot to evaluate

In [23]:
jc_out_locations = jc_df_out[['LATITUDE', 'LONGITUDE']]
jc_out_location_list = jc_out_locations.values.tolist()
jc_out_location_list

[[51.5072, -0.10443000000000001],
 [51.5196, -0.12019],
 [51.527, -0.08051000000000001],
 [51.4957, -0.17359000000000002],
 [51.5193, -0.16915000000000002],
 [51.4987, -0.11242],
 [51.47, -0.17175],
 [51.4741, -0.11272000000000001],
 [51.5114, -0.11924000000000001],
 [51.5229, -0.12584],
 [51.4897, -0.13145],
 [51.5018, -0.15163000000000001],
 [51.4934, -0.1005],
 [51.5011, -0.12628],
 [51.4842, -0.18431],
 [51.4968, -0.08187000000000001],
 [51.53, -0.16889],
 [51.5314, -0.11401000000000001],
 [51.4739, -0.09045],
 [51.5427, -0.13744]]

In [37]:
tims_out_locations = tims_df_out[['LATITUDE', 'LONGITUDE']]
tims_out_location_list = tims_out_locations.values.tolist()
tims_out_location_list

[[51.5072038583171, -0.10444549013942342],
 [51.51954223047245, -0.12018365469107276],
 [51.52695477101432, -0.08046626301758986],
 [51.49574851538314, -0.1736323547564191],
 [51.519225717542746, -0.16912184700730507],
 [51.49869424890465, -0.11233744956233066],
 [51.470081980794575, -0.17173276524340406],
 [51.474018387680466, -0.11274198169253108],
 [51.51132508408336, -0.11919924479598855],
 [51.52281006358941, -0.12582156280936407],
 [51.48965347151054, -0.1315339950978373],
 [51.501812843348645, -0.15153268215566],
 [51.49330020479784, -0.10048447583534634],
 [51.50103610567439, -0.1261999432376206],
 [51.48409836311331, -0.18428652602625736],
 [51.49669026604021, -0.08188527277899889],
 [51.53008817196279, -0.16896145941764654],
 [51.53135816829704, -0.11411898735528392],
 [51.473938572157444, -0.09056108554190924],
 [51.54281656050744, -0.13745950602551027]]

In [40]:
London = [51.506949, -0.122876]
londonmap = folium.Map(
    width=500, height=500,
    location = London,
    zoom_start = 12, 
    tiles = 'stamentoner')

for point in range(0, len(tims_out_location_list)):
    folium.CircleMarker(tims_out_location_list[point],
                        radius=10,
                        color='red',
                        fill_color='red',
                        fill_opacity=0
                       ).add_to(londonmap)
    
for point in range(0, len(jc_out_location_list)):
    folium.CircleMarker(jc_out_location_list[point],
                        radius=5,
                        color='blue',
                        fill_color='blue',
                        fill_opacity=0
                       ).add_to(londonmap)
    
londonmap.save("C:/Users/joh3146/Documents/dssg/jamcams_tims_selected_for_eval.html")
londonmap