In [1]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
%matplotlib inline
import seaborn as sns; sns.set_theme(color_codes=True)
pd.set_option('display.max_columns', None)
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.colors
import warnings
warnings.filterwarnings('ignore')
from scipy import stats
import math

In [36]:
labels = gpd.read_file('data/processed-labels/labels_all_oradell/labels_all_oradell.shp')
curb = labels.loc[(labels['label_type']=='CurbRamp')]
curb = curb.to_crs("EPSG:2285")
no_curb = labels.loc[(labels['label_type']=='NoCurbRamp')]
no_curb = no_curb.to_crs("EPSG:2285")
crosswalk = labels.loc[(labels['label_type']=='Crosswalk')]
crosswalk = crosswalk.to_crs("EPSG:2285")


In [11]:
#read in shapefile data/seattle-osm-roads.shp
roads = gpd.read_file('data/oradell-osm-roads/oradell-osm-roads-full.shp')

In [12]:
#change highway column name to way_type
roads.rename(columns={'highway':'way_type'}, inplace=True)

In [13]:
roads

Unnamed: 0,way_type,osmid,geometry
0,service,11565015,"LINESTRING (-74.02942 40.95394, -74.02942 40.9..."
1,service,11565015,"LINESTRING (-74.02988 40.95400, -74.02988 40.9..."
2,footway,845525905,"LINESTRING (-74.03794 40.95856, -74.03763 40.9..."
3,footway,845525905,"LINESTRING (-74.03763 40.95920, -74.03794 40.9..."
4,footway,845525905,"LINESTRING (-74.03763 40.95920, -74.03790 40.9..."
...,...,...,...
1825,primary,1027007129,"LINESTRING (-74.02582 40.94882, -74.02580 40.9..."
1826,primary,1027007129,"LINESTRING (-74.03218 40.95121, -74.03216 40.9..."
1827,primary,1027007129,"LINESTRING (-74.02885 40.94212, -74.02929 40.9..."
1828,primary,1027007129,"LINESTRING (-74.03680 40.95124, -74.03641 40.9..."


In [14]:
# Extract start and end coordinates of each linestring
first_coord = roads["geometry"].apply(lambda g: g.coords[0])
last_coord = roads["geometry"].apply(lambda g: g.coords[-1])

# Add start and end as columns to the s
roads["start_coord"] = first_coord
roads["end_coord"] = last_coord

In [15]:
p0 = roads[['start_coord', 'way_type']]
p1 = roads[['end_coord','way_type']]
p0 = p0.rename(columns={'start_coord':'coord'})
p1 = p1.rename(columns={'end_coord':'coord'})
result = pd.concat([p0,p1])
result = result.drop_duplicates(subset=['coord','way_type'], keep='first')
result

Unnamed: 0,coord,way_type
0,"(-74.0294205, 40.9539446)",service
1,"(-74.0298831, 40.9540022)",service
2,"(-74.037941, 40.958563)",footway
3,"(-74.037627, 40.959204)",footway
5,"(-74.031958, 40.9484778)",service
...,...,...
1335,"(-74.021785, 40.952872)",primary
1336,"(-74.0257547, 40.9490783)",primary
1337,"(-74.0320109, 40.9521345)",primary
1338,"(-74.029802, 40.942352)",primary


In [38]:
#convert points into gpd
points = result.apply(lambda row: Point(row), axis=1)
#fix coordinate system
p_geo = gpd.GeoDataFrame(result, geometry=points)
p_geo.crs = {'init': 'epsg:4326'}
p_geo = p_geo.to_crs("EPSG:2285")
p_geo

Unnamed: 0,coord,way_type,geometry
0,"(-74.0294205, 40.9539446)",service,POINT (13876921.567 1629679.527)
1,"(-74.0298831, 40.9540022)",service,POINT (13876803.837 1629623.323)
2,"(-74.037941, 40.958563)",footway,POINT (13874007.092 1629716.185)
3,"(-74.037627, 40.959204)",footway,POINT (13873944.389 1629959.209)
5,"(-74.031958, 40.9484778)",service,POINT (13877488.546 1627629.189)
...,...,...,...
1335,"(-74.021785, 40.952872)",primary,POINT (13878890.315 1630570.608)
1336,"(-74.0257547, 40.9490783)",primary,POINT (13878779.447 1628796.539)
1337,"(-74.0320109, 40.9521345)",primary,POINT (13876709.572 1628722.359)
1338,"(-74.029802, 40.942352)",primary,POINT (13879265.772 1626126.646)


In [84]:
# add a column based on way_type called buffer_ft
# 1. primary = 120
# 2. secondary = 100
# 3. tertiary = 80
# 4. residential = 40
# everything else = 60
p_geo['buffer_ft'] = p_geo['way_type'].apply(lambda x: 40 if x == 'primary' else 40 if x == 'secondary' else 40 if x == 'tertiary' else 40)
p_geo

Unnamed: 0,coord,way_type,geometry,buffer_ft
0,"(-74.0294205, 40.9539446)",service,POINT (13876921.567 1629679.527),40
1,"(-74.0298831, 40.9540022)",service,POINT (13876803.837 1629623.323),40
2,"(-74.037941, 40.958563)",footway,POINT (13874007.092 1629716.185),40
3,"(-74.037627, 40.959204)",footway,POINT (13873944.389 1629959.209),40
5,"(-74.031958, 40.9484778)",service,POINT (13877488.546 1627629.189),40
...,...,...,...,...
1335,"(-74.021785, 40.952872)",primary,POINT (13878890.315 1630570.608),40
1336,"(-74.0257547, 40.9490783)",primary,POINT (13878779.447 1628796.539),40
1337,"(-74.0320109, 40.9521345)",primary,POINT (13876709.572 1628722.359),40
1338,"(-74.029802, 40.942352)",primary,POINT (13879265.772 1626126.646),40


In [73]:
print(p_geo['way_type'].unique())

['service' 'footway' 'path' 'residential' 'tertiary' 'secondary' 'primary']


In [85]:
buffered = p_geo.copy()
buffered['geometry'] = buffered.apply(lambda x: x.geometry.buffer(x.buffer_ft), axis=1)
buffered.head(3)

Unnamed: 0,coord,way_type,geometry,buffer_ft
0,"(-74.0294205, 40.9539446)",service,"POLYGON ((13876961.567 1629679.527, 13876961.3...",40
1,"(-74.0298831, 40.9540022)",service,"POLYGON ((13876843.837 1629623.323, 13876843.6...",40
2,"(-74.037941, 40.958563)",footway,"POLYGON ((13874047.092 1629716.185, 13874046.9...",40


In [86]:
curb_tp = curb.sjoin(buffered, op="within", how="inner")
no_curb_tp = no_curb.sjoin(buffered, op="within", how="inner")
crosswalk_tp = crosswalk.sjoin(buffered, op="within", how="inner")

In [87]:
id_list_a= curb_tp['label_id'].unique().tolist()
curb_fp = curb[~curb['label_id'].isin(id_list_a)]
id_list_b= no_curb_tp['label_id'].unique().tolist()
no_curb_fp = no_curb[~no_curb['label_id'].isin(id_list_b)]
id_list_c= crosswalk_tp['label_id'].unique().tolist()
crosswalk_fp = crosswalk[~crosswalk['label_id'].isin(id_list_c)]

In [94]:
crosswalk_fp.explore( m = base, tiles="cartodbpositron", tooltip="label_id")

In [90]:
base_roads = roads.explore(tiles='cartodb_positron', column='way_type', cmap="Set2")

In [91]:
#new = gpd.read_file('data/processed-labels/oradell_ground_truth_labels.shp')

In [92]:
#select only ground_tru = 0 and label_type = NoCurbRamp
#new = new.loc[(new['ground_tru']==0) & (new['label_type']=='Crosswalk')]

In [95]:
#base = new.explore(tiles="cartodbpositron", tooltip="label_id", color = "red")
#base

In [96]:
#concatenate curb_fp_1 and no_curb_fp_1
fp = pd.concat([curb_fp, no_curb_fp, crosswalk_fp])

In [97]:
#create a new data frame with only the label_id column from labels
df = labels[['label_id']]

In [98]:
#add a column to df called false_positive
# if label_id is in fp, then false_positive = 1
# else false_positive = 0
df['false_positive'] = df['label_id'].apply(lambda x: 1 if x in fp['label_id'].values else 0)

In [99]:
df

Unnamed: 0,label_id,false_positive
0,9,0
1,10,0
2,11,0
3,12,0
4,13,0
...,...,...
12129,13096,1
12130,13097,0
12131,13098,0
12132,13099,1


In [100]:
# #save to csv file 
df.to_csv('data/features/false_positives.csv', index=False)