In [1]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
%matplotlib inline
import seaborn as sns; sns.set_theme(color_codes=True)
pd.set_option('display.max_columns', None)
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.colors
import warnings
warnings.filterwarnings('ignore')
from scipy import stats
import math

In [2]:
labels = gpd.read_file('data/processed-labels/labels_all_oradell/labels_all_oradell.shp')
curb = labels.loc[(labels['label_type']=='CurbRamp')]
#change coordinate system
curb = curb.to_crs("EPSG:2285")
no_curb = labels.loc[(labels['label_type']=='NoCurbRamp')]
no_curb = no_curb.to_crs("EPSG:2285")

In [7]:
#read in shapefile data/seattle-osm-roads.shp
roads = gpd.read_file('data/oradell-osm-roads/ordell-osm-roads.shp')

In [9]:
#roads.explore( column='highway', tiles='cartodbpositron', cmap='Set2')


In [9]:
#change highway column name to way_type
roads.rename(columns={'highway':'way_type'}, inplace=True)

In [11]:
# only select osm_id, way_type, and geometry columns
roads = roads[['osm_id', 'way_type', 'geometry']]


In [12]:
roads

Unnamed: 0,osm_id,way_type,geometry
0,11562297,residential,"LINESTRING (-74.03996 40.94794, -74.04089 40.9..."
1,11562297,residential,"LINESTRING (-74.03088 40.95201, -74.03136 40.9..."
2,11564127,residential,"LINESTRING (-74.03435 40.95815, -74.03432 40.9..."
3,11564531,residential,"LINESTRING (-74.03670 40.94918, -74.03755 40.9..."
4,11564531,residential,"LINESTRING (-74.02456 40.94985, -74.02463 40.9..."
...,...,...,...
484,1027007129,primary,"LINESTRING (-74.02582 40.94882, -74.02580 40.9..."
485,1027007129,primary,"LINESTRING (-74.03218 40.95121, -74.03216 40.9..."
486,1027007129,primary,"LINESTRING (-74.02885 40.94212, -74.02929 40.9..."
487,1027007129,primary,"LINESTRING (-74.03680 40.95124, -74.03641 40.9..."


In [13]:
# Extract start and end coordinates of each linestring
first_coord = roads["geometry"].apply(lambda g: g.coords[0])
last_coord = roads["geometry"].apply(lambda g: g.coords[-1])

# Add start and end as columns to the s
roads["start_coord"] = first_coord
roads["end_coord"] = last_coord

In [14]:
p0 = roads[['start_coord', 'way_type']]
p1 = roads[['end_coord','way_type']]
p0 = p0.rename(columns={'start_coord':'coord'})
p1 = p1.rename(columns={'end_coord':'coord'})
result = pd.concat([p0,p1])
result = result.drop_duplicates(subset=['coord','way_type'], keep='first')
result

Unnamed: 0,coord,way_type
0,"(-74.0399581, 40.9479384)",residential
1,"(-74.0308757, 40.9520131)",residential
2,"(-74.034354, 40.958145)",residential
3,"(-74.0366974, 40.9491769)",residential
4,"(-74.024558, 40.949853)",residential
...,...,...
483,"(-74.021785, 40.952872)",primary
484,"(-74.0257547, 40.9490783)",primary
485,"(-74.0320109, 40.9521345)",primary
486,"(-74.029802, 40.942352)",primary


In [47]:
#convert points into gpd
points = result.apply(lambda row: Point(row), axis=1)
#fix coordinate system
p_geo = gpd.GeoDataFrame(result, geometry=points)
p_geo.crs = {'init': 'epsg:4326'}
p_geo = p_geo.to_crs("EPSG:2285")
p_geo

Unnamed: 0,coord,way_type,geometry
0,"(-74.0399581, 40.9479384)",residential,POINT (13875774.287 1626194.660)
1,"(-74.0308757, 40.9520131)",residential,POINT (13876994.306 1628866.289)
2,"(-74.034354, 40.958145)",residential,POINT (13874913.946 1630160.497)
3,"(-74.0366974, 40.9491769)",residential,POINT (13876259.393 1627086.211)
4,"(-74.024558, 40.949853)",residential,POINT (13878890.271 1629220.215)
...,...,...,...
483,"(-74.021785, 40.952872)",primary,POINT (13878890.315 1630570.608)
484,"(-74.0257547, 40.9490783)",primary,POINT (13878779.447 1628796.539)
485,"(-74.0320109, 40.9521345)",primary,POINT (13876709.572 1628722.359)
486,"(-74.029802, 40.942352)",primary,POINT (13879265.772 1626126.646)


In [48]:
# add a column based on way_type called buffer_ft
# 1. primary = 120
# 2. secondary = 100
# 3. tertiary = 80
# 4. residential = 40
# everything else = 60
p_geo['buffer_ft'] = p_geo['way_type'].apply(lambda x: 120 if x == 'primary' else 120 if x == 'secondary' else 120 if x == 'tertiary' else 120)
p_geo

Unnamed: 0,coord,way_type,geometry,buffer_ft
0,"(-74.0399581, 40.9479384)",residential,POINT (13875774.287 1626194.660),120
1,"(-74.0308757, 40.9520131)",residential,POINT (13876994.306 1628866.289),120
2,"(-74.034354, 40.958145)",residential,POINT (13874913.946 1630160.497),120
3,"(-74.0366974, 40.9491769)",residential,POINT (13876259.393 1627086.211),120
4,"(-74.024558, 40.949853)",residential,POINT (13878890.271 1629220.215),120
...,...,...,...,...
483,"(-74.021785, 40.952872)",primary,POINT (13878890.315 1630570.608),120
484,"(-74.0257547, 40.9490783)",primary,POINT (13878779.447 1628796.539),120
485,"(-74.0320109, 40.9521345)",primary,POINT (13876709.572 1628722.359),120
486,"(-74.029802, 40.942352)",primary,POINT (13879265.772 1626126.646),120


In [49]:
print(p_geo['way_type'].unique())

['residential' 'tertiary' 'secondary' 'primary']


In [50]:
buffered = p_geo.copy()
buffered['geometry'] = buffered.apply(lambda x: x.geometry.buffer(x.buffer_ft), axis=1)
buffered.head(3)

Unnamed: 0,coord,way_type,geometry,buffer_ft
0,"(-74.0399581, 40.9479384)",residential,"POLYGON ((13875894.287 1626194.660, 13875893.7...",120
1,"(-74.0308757, 40.9520131)",residential,"POLYGON ((13877114.306 1628866.289, 13877113.7...",120
2,"(-74.034354, 40.958145)",residential,"POLYGON ((13875033.946 1630160.497, 13875033.3...",120


In [52]:
curb_tp = curb.sjoin(buffered, op="within", how="inner")
no_curb_tp = no_curb.sjoin(buffered, op="within", how="inner")

In [54]:
id_list_a= curb_tp['label_id'].unique().tolist()
curb_fp = curb[~curb['label_id'].isin(id_list_a)]
id_list_b= no_curb_tp['label_id'].unique().tolist()
no_curb_fp = no_curb[~no_curb['label_id'].isin(id_list_b)]

In [55]:
#select ones in labels that are in curb_fp_1 by label_id
fp_2 = labels[labels['label_id'].isin(curb_fp['label_id'])]
fp_2.explore()


In [38]:
base = roads.explore(tiles='cartodb_positron', column='way_type')

In [56]:
curb_fp.explore(m = base, tiles='cartodb_positron', color='green')


In [29]:
#select label_ids in curb_fp_1 as a dataframe
curb_fp_1 = curb_fp_1[['label_id']]

In [30]:
no_curb_fp_1 = no_curb_fp_1[['label_id']]

In [31]:
#concatenate curb_fp_1 and no_curb_fp_1
fp = pd.concat([curb_fp_1, no_curb_fp_1])

In [32]:
#create a new data frame with only the label_id column from labels
df = labels[['label_id']]

In [33]:
#add a column to df called false_positive
# if label_id is in fp, then false_positive = 1
# else false_positive = 0
df['false_positive'] = df['label_id'].apply(lambda x: 1 if x in fp['label_id'].values else 0)

In [34]:
df

Unnamed: 0,label_id,false_positive
0,9,0
1,10,0
2,11,0
3,12,0
4,13,0
...,...,...
12129,13096,1
12130,13097,0
12131,13098,0
12132,13099,0


In [153]:
# #save to csv file 
df.to_csv('data/features/false_positives.csv', index=False)