In [2]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
%matplotlib inline
import seaborn as sns; sns.set_theme(color_codes=True)
pd.set_option('display.max_columns', None)
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.colors
import warnings
warnings.filterwarnings('ignore')
from scipy import stats
import math

In [24]:
# read in the streets.json file
streets = gpd.read_file('data/streets.json')
#select only streets_edge_id adn geometry
streets = streets[['street_edge_id', 'geometry']]
streets


Unnamed: 0,street_edge_id,geometry
0,87,"LINESTRING (-74.03996 40.94794, -74.04089 40.9..."
1,184,"LINESTRING (-74.03088 40.95201, -74.03136 40.9..."
2,477,"LINESTRING (-74.03435 40.95815, -74.03432 40.9..."
3,273,"LINESTRING (-74.03670 40.94918, -74.03755 40.9..."
4,51,"LINESTRING (-74.02456 40.94985, -74.02463 40.9..."
...,...,...
484,465,"LINESTRING (-74.02582 40.94882, -74.02580 40.9..."
485,449,"LINESTRING (-74.03218 40.95121, -74.03216 40.9..."
486,64,"LINESTRING (-74.02885 40.94212, -74.02929 40.9..."
487,55,"LINESTRING (-74.03680 40.95124, -74.03641 40.9..."


In [37]:
#read in labels
labels = gpd.read_file('data/labels.json')
labels.head()

Unnamed: 0,attribute_id,label_type,street_edge_id,osm_street_id,neighborhood,avg_image_capture_date,avg_label_date,severity,is_temporary,agree_count,disagree_count,notsure_count,geometry
0,814812,CurbRamp,166,11579039,South Oradell,2018-10-01 00:00:00,2022-07-15 01:51:16.295,2.0,False,1,0,0,POINT (-74.03070 40.94373)
1,814798,CurbRamp,62,11569928,South Oradell,2020-07-31 12:00:00,2022-09-25 07:34:21.320,2.0,False,3,0,0,POINT (-74.02892 40.94461)
2,811223,SurfaceProblem,71,11570206,North Oradell,2018-07-01 00:00:00,2022-03-18 01:17:56.551,1.0,False,2,0,0,POINT (-74.03667 40.96669)
3,814504,SurfaceProblem,385,61313120,West Oradell,2022-03-01 00:00:00,2022-09-12 15:27:26.860,1.0,False,0,0,0,POINT (-74.04522 40.95797)
4,810476,NoSidewalk,375,61300744,North Oradell,2021-10-01 00:00:00,2022-06-18 00:39:17.434,5.0,False,2,0,0,POINT (-74.03003 40.96650)


In [16]:
#seperate curb ramps and others
curb_ramp = labels.loc[labels['label_type'] == 'CurbRamp']
others = labels.loc[labels['label_type'] != 'CurbRamp']

In [17]:
#Weighted score based severity
dict1 ={ 1.0:1.0, 2.0:0.8, 3.0: 0.6, 4.0:0.4, 5.0:0.2}
curb_ramp = curb_ramp.replace({"severity": dict1})
dict2 ={ 1.0:-0.2, 2.0:-0.4, 3.0: -0.6, 4.0:-0.8, 5.0:-1}
others = others.replace({"severity": dict2})

In [18]:
#combine the two dataframes
df= pd.concat([curb_ramp, others])
df_grouped = df.groupby(by=["street_edge_id"]).sum().reset_index()
df_grouped.head()

Unnamed: 0,street_edge_id,attribute_id,osm_street_id,severity,is_temporary,agree_count,disagree_count,notsure_count
0,1,8159119,115622970,-5.6,0,52,5,1
1,2,9788895,138747564,-5.6,0,56,6,0
2,3,15500362,219718413,-9.4,0,92,14,1
3,5,8971030,127209841,-0.4,0,83,10,1
4,6,11423266,161903434,-7.8,0,72,10,1


In [19]:
#apply sigmoid function to the weighted score
def sigmoid(x):
  return 1 / (1 + math.exp(-x))
df_grouped['score_sigmoid'] = df_grouped['severity'].apply(sigmoid)

In [25]:
#select street_edge_id and score_sigmoid and merge with streets geometry
sigmoid_score = df_grouped[['street_edge_id', 'score_sigmoid']]
streets = streets.merge(sigmoid_score, on='street_edge_id', how='inner')
streets

Unnamed: 0,street_edge_id,geometry,score_sigmoid
0,87,"LINESTRING (-74.03996 40.94794, -74.04089 40.9...",0.768525
1,184,"LINESTRING (-74.03088 40.95201, -74.03136 40.9...",0.998887
2,477,"LINESTRING (-74.03435 40.95815, -74.03432 40.9...",0.500000
3,273,"LINESTRING (-74.03670 40.94918, -74.03755 40.9...",0.401312
4,51,"LINESTRING (-74.02456 40.94985, -74.02463 40.9...",0.000335
...,...,...,...
458,211,"LINESTRING (-74.02045 40.95287, -74.02178 40.9...",0.006693
459,449,"LINESTRING (-74.03218 40.95121, -74.03216 40.9...",0.401312
460,64,"LINESTRING (-74.02885 40.94212, -74.02929 40.9...",0.802184
461,55,"LINESTRING (-74.03680 40.95124, -74.03641 40.9...",0.119203


In [26]:
#save streets as json
streets.to_file("data/streets_score.json", driver="GeoJSON")

In [29]:
#drop avg_image_capture_date	avg_label_date from labels
labels = labels.drop(['avg_image_capture_date', 'avg_label_date'], axis=1)

In [35]:
labels_map = labels.explore(column='label_type', tiles='cartodbpositron')

In [36]:
streets.explore(m =labels_map,  column='score_sigmoid', cmap='RdYlGn', tiles='cartodbpositron')
