This notebook is for projecting Project Sidewalk labels to the cleaned pedestrian network.\
pednet_pid is the unique key to join the two datasets.\

In [1]:
import geopandas as gpd
from shapely.geometry import Point

In [2]:
#read in ped network
pedestrian_network = gpd.read_file('../data/cleaned-pednetwork/cleaned-pednetwork-full/cleaned-pednetwork-full.shp')

## 1. Projecting PS curb ramps and missing curb ramps to crossings

In [3]:
#single out the fclass = crossing
crossings = pedestrian_network[pedestrian_network['fclass'] == 'crossing']

In [4]:
# read in the filtered ramps data
ramps = gpd.read_file('../data/project-sidewalk/project-sidewalk-labels/filtered-ramps/filtered-ramps.shp')

In [5]:
ramps

Unnamed: 0,id,labelType,streetId,osmWayId,neighborhd,avgImgDate,avgLblDate,severity,temporary,nAgree,nDisagree,nNotsure,clusterSze,userIds,geometry
0,104337968,CurbRamp,22785,424636851,Montlake,2021-06-30 17:00:00.0,2024-05-08 21:57:04.142,2.0,F,1,0,0,1,[8d5ecc00-2b9a-4edf-af7f-ce0ba7edb4a8],POINT (1277926.743 238356.633)
1,104249562,NoCurbRamp,18691,242504567,East Queen Anne,2019-04-30 17:00:00.0,2020-12-20 20:02:50.521,3.0,F,2,0,0,1,[7e87b104-1b6d-4b3b-bde0-1733f7463dab],POINT (1266846.9 234326.846)
2,104354371,CurbRamp,22318,389798783,Interbay,2018-11-29 16:00:00.0,2020-03-13 06:32:54.663,1.0,F,2,0,0,2,"[91376904-0b1e-4a89-a83b-fa5d9c52f20f,12d3f3e7...",POINT (1259453.075 240344.31)
3,104354280,CurbRamp,7343,6465628,Interbay,2019-04-10 17:00:00.0,2020-11-22 05:24:45.404,1.0,F,2,0,0,3,"[12d3f3e7-1635-41ee-84e5-28f4130828d5,f98c9d5b...",POINT (1259460.057 240410.974)
4,104247881,CurbRamp,3950,6406572,East Queen Anne,2019-04-30 17:00:00.0,2021-01-31 10:40:23.757667,1.0,F,4,0,0,3,"[0e1ae564-6d72-4670-98e4-71369cc5ab26,12d3f3e7...",POINT (1265401.015 235613.512)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16742,103346952,CurbRamp,6732,6456858,Portage Bay,2019-07-01 01:00:00.0,2021-06-21 04:15:46.963,1.0,F,3,0,0,3,"[8ba4641d-f661-4351-8f20-ff86057a8b16,435309a9...",POINT (1273738.444 241219.222)
16743,104343605,CurbRamp,3970,6407501,West Queen Anne,2019-04-30 17:00:00.0,2021-01-23 11:35:13.065,1.0,F,1,0,0,1,[8ba4641d-f661-4351-8f20-ff86057a8b16],POINT (1260114.503 238154.335)
16744,104253603,CurbRamp,14480,62202243,Interbay,2019-04-30 17:00:00.0,2020-11-10 09:48:02.815,3.0,F,1,0,0,1,[a7dfa770-211a-4257-9a2e-a401b8a6ad54],POINT (1263871.558 229930.704)
16745,104354259,CurbRamp,14480,62202243,Interbay,2019-04-30 17:00:00.0,2020-11-10 09:48:51.578,3.0,F,1,0,0,1,[a7dfa770-211a-4257-9a2e-a401b8a6ad54],POINT (1263848.708 229917.24)


In [6]:
def project_points_to_nearest_line_spatial_join(points_gdf, lines_gdf):
    # Spatial join to find the nearest line for each point
    points_gdf['nearest_line_id'] = points_gdf.geometry.apply(
        lambda geom: lines_gdf.distance(geom).idxmin()
    )
    # Merge the points with their corresponding nearest lines
    points_with_lines = points_gdf.merge(
        lines_gdf, left_on='nearest_line_id', right_index=True, suffixes=('', '_line')
    )
    # Project the points onto the nearest line
    points_with_lines['projected_point'] = points_with_lines.apply(
        lambda row: row.geometry_line.interpolate(row.geometry_line.project(row.geometry)),
        axis=1
    )
    # Create a new GeoDataFrame with the projected points
    projected_gdf = gpd.GeoDataFrame(points_with_lines, geometry='projected_point', crs=points_gdf.crs)
    projected_gdf = projected_gdf.drop(columns=['geometry', 'nearest_line_id', 'geometry_line'])
    return projected_gdf


In [7]:
projected_ramps = project_points_to_nearest_line_spatial_join(ramps, crossings)

In [10]:
projected_ramps = projected_ramps.rename(columns={'id': 'projectSidewalk_id'})

In [15]:
projected_ramps.head()

Unnamed: 0,projectSidewalk_id,labelType,streetId,osmWayId,neighborhd,avgImgDate,avgLblDate,severity,temporary,nAgree,...,userIds,pednet_id,pednet_pid,osm_id,type,fclass,crossing_t,z-start,z-end,projected_point
0,104337968,CurbRamp,22785,424636851,Montlake,2021-06-30 17:00:00.0,2024-05-08 21:57:04.142,2.0,F,1,...,[8d5ecc00-2b9a-4edf-af7f-ce0ba7edb4a8],0.0,0,1000034000.0,ped,crossing,m,15.4,16.200001,POINT (1277944.809 238365.143)
1,104249562,NoCurbRamp,18691,242504567,East Queen Anne,2019-04-30 17:00:00.0,2020-12-20 20:02:50.521,3.0,F,2,...,[7e87b104-1b6d-4b3b-bde0-1733f7463dab],5.0,5,1005641000.0,ped,crossing,m,117.199997,115.699997,POINT (1266851.15 234325.27)
2,104354371,CurbRamp,22318,389798783,Interbay,2018-11-29 16:00:00.0,2020-03-13 06:32:54.663,1.0,F,2,...,"[91376904-0b1e-4a89-a83b-fa5d9c52f20f,12d3f3e7...",6.0,6,1005680000.0,ped,crossing,m,15.4,15.7,POINT (1259457.322 240344.547)
3,104354280,CurbRamp,7343,6465628,Interbay,2019-04-10 17:00:00.0,2020-11-22 05:24:45.404,1.0,F,2,...,"[12d3f3e7-1635-41ee-84e5-28f4130828d5,f98c9d5b...",7.0,7,1005680000.0,ped,crossing,m,15.7,16.1,POINT (1259460.046 240410.273)
4,104247881,CurbRamp,3950,6406572,East Queen Anne,2019-04-30 17:00:00.0,2021-01-31 10:40:23.757667,1.0,F,4,...,"[0e1ae564-6d72-4670-98e4-71369cc5ab26,12d3f3e7...",27.0,30,1011359000.0,ped,crossing,u,126.300003,125.599998,POINT (1265402.4 235610.647)


In [21]:
#keep only the columns we need
projected_ramps = projected_ramps [['projectSidewalk_id', 'labelType', 'neighborhd','avgLblDate', 'severity', 'pednet_pid', 'projected_point']]

In [19]:
projected_ramps 

Unnamed: 0,projectSidewalk_id,labelType,neighborhd,avgLblDate,severity,pednet_pid,projected_point
0,104337968,CurbRamp,Montlake,2024-05-08 21:57:04.142,2.0,0,POINT (1277944.809 238365.143)
1,104249562,NoCurbRamp,East Queen Anne,2020-12-20 20:02:50.521,3.0,5,POINT (1266851.15 234325.27)
2,104354371,CurbRamp,Interbay,2020-03-13 06:32:54.663,1.0,6,POINT (1259457.322 240344.547)
3,104354280,CurbRamp,Interbay,2020-11-22 05:24:45.404,1.0,7,POINT (1259460.046 240410.273)
4,104247881,CurbRamp,East Queen Anne,2021-01-31 10:40:23.757667,1.0,30,POINT (1265402.4 235610.647)
...,...,...,...,...,...,...,...
16742,103346952,CurbRamp,Portage Bay,2021-06-21 04:15:46.963,1.0,33673,POINT (1273739.917 241222.609)
16743,104343605,CurbRamp,West Queen Anne,2021-01-23 11:35:13.065,1.0,33713,POINT (1260143.983 238154.605)
16744,104253603,CurbRamp,Interbay,2020-11-10 09:48:02.815,3.0,33721,POINT (1263871.902 229930.247)
16745,104354259,CurbRamp,Interbay,2020-11-10 09:48:51.578,3.0,3467,POINT (1263855.598 229911.92)


In [39]:
# get severity values
projected_ramps['severity'].value_counts(dropna=False)

severity
1.0    7753
2.0    3788
3.0    2723
4.0    1506
5.0     847
Name: count, dtype: int64

In [38]:
# drop nan subset of severity
projected_ramps = projected_ramps.dropna(subset=['severity'])

In [40]:
#save projected points
projected_ramps.to_file('../data/project-sidewalk/project-sidewalk-labels/projected/projected-ramps/projected-ramps.shp')

  projected_ramps.to_file('../data/project-sidewalk/project-sidewalk-labels/projected/projected-ramps/projected-ramps.shp')
  ogr_write(


# 2. Projecting PS surface problems and obstacles to sidewalks

In [22]:
# repeat for surface problems and obstacles
# read in ps data
ps_data = gpd.read_file('../data/project-sidewalk/project-sidewalk-labels/project-sidewalk-labels.shp')

In [23]:
#select surface problems and obstacles in one go
sidewalk_problems = ps_data[(ps_data['labelType'] == 'SurfaceProblem') | (ps_data['labelType'] == 'Obstacle')]

In [24]:
sidewalk_problems

Unnamed: 0,id,labelType,streetId,osmWayId,neighborhd,avgImgDate,avgLblDate,severity,temporary,nAgree,nDisagree,nNotsure,clusterSze,userIds,geometry
24,99416427,SurfaceProblem,14825,98239854,Harbor Island,2017-08-31 17:00:00.0,2019-04-26 15:26:23.331,2.0,F,1,0,1,1,[939b6faa-0b57-4160-bcc2-d11fd2b69d9f],POINT (1264988.786 215907.645)
25,99416428,SurfaceProblem,14825,98239854,Harbor Island,2017-08-31 17:00:00.0,2019-04-26 15:26:37.525,4.0,F,2,0,0,1,[939b6faa-0b57-4160-bcc2-d11fd2b69d9f],POINT (1264988.691 215998.11)
26,99416429,SurfaceProblem,14822,98239835,Harbor Island,2017-08-31 17:00:00.0,2019-04-26 14:45:09.441,4.0,F,2,0,0,1,[939b6faa-0b57-4160-bcc2-d11fd2b69d9f],POINT (1265194.078 216861.105)
27,99416430,SurfaceProblem,14822,98239835,Harbor Island,2017-08-31 17:00:00.0,2019-04-26 14:44:49.9,4.0,F,1,1,0,1,[939b6faa-0b57-4160-bcc2-d11fd2b69d9f],POINT (1265167.613 216856.061)
28,99416431,SurfaceProblem,14822,98239835,Harbor Island,2014-05-31 17:00:00.0,2019-04-26 14:47:49.022,4.0,F,3,1,0,1,[939b6faa-0b57-4160-bcc2-d11fd2b69d9f],POINT (1265749.26 216937.821)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36585,104360813,Obstacle,3777,6402847,North Admiral,2019-05-31 17:00:00.0,2024-01-22 06:32:02.953,1.0,F,0,0,0,1,[700ec25e-8175-4ad5-aedd-747fcfce033a],POINT (1254546.714 214569.352)
36586,104360814,Obstacle,20279,297417500,North Admiral,2021-07-31 17:00:00.0,2024-03-05 11:30:06.369,1.0,F,1,0,0,1,[65cf7fe8-0ab5-4edd-a6f4-c714e2bad073],POINT (1252815.949 215315.79)
36587,104360815,Obstacle,6237,6448663,North Admiral,2019-05-31 17:00:00.0,2023-11-06 14:05:52.485,2.0,F,1,0,0,1,[700ec25e-8175-4ad5-aedd-747fcfce033a],POINT (1257256.492 219766.953)
36588,104360816,Obstacle,6237,6448663,North Admiral,2019-05-31 17:00:00.0,2023-11-06 14:04:47.995,4.0,F,0,0,0,1,[700ec25e-8175-4ad5-aedd-747fcfce033a],POINT (1257143.869 219784.536)


In [25]:
#select pedestrian network where fclass !=  crossing and type = ped 
pedestrian_network_no_crossings = pedestrian_network[(pedestrian_network['fclass'] != 'crossing') & (pedestrian_network['type'] == 'ped')]

In [26]:
pedestrian_network_no_crossings

Unnamed: 0,pednet_id,pednet_pid,osm_id,type,fclass,crossing_t,z-start,z-end,geometry
1,1.0,1,1.000034e+09,ped,footway,0,16.200001,16.200001,"LINESTRING (1277978.83 238310.174, 1277966.394..."
2,2.0,2,1.000034e+09,ped,footway,0,10.500000,16.200001,"LINESTRING (1278374.334 238398.46, 1278317.326..."
3,3.0,3,1.005452e+09,ped,steps,0,11.600000,11.600000,"LINESTRING (1284223.335 235156.549, 1284211.29..."
4,4.0,4,1.005641e+09,ped,steps,0,124.599998,118.900002,"LINESTRING (1266761.001 234328.359, 1266840.43..."
8,8.0,8,1.009185e+09,ped,steps,0,35.200001,35.400002,"LINESTRING (1265784.481 231380.331, 1265785.13..."
...,...,...,...,...,...,...,...,...,...
34919,32950.0,34915,1.123069e+09,ped,footway,0,94.300003,93.800003,"LINESTRING (1262258.894 239387.956, 1262164.56..."
34925,32956.0,34921,5.916819e+08,ped,footway,0,68.199997,68.199997,"LINESTRING (1262641.401 239978.955, 1262670.91..."
34927,32958.0,34923,1.124353e+09,ped,footway,0,71.800003,78.800003,"LINESTRING (1262143.909 239841.769, 1262248.62..."
34928,32959.0,34924,1.124353e+09,ped,footway,0,68.199997,68.199997,"LINESTRING (1262631.523 239983.092, 1262641.40..."


In [27]:
projected_problems = project_points_to_nearest_line_spatial_join(sidewalk_problems, pedestrian_network_no_crossings)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [30]:
projected_problems = projected_problems.rename(columns={'id': 'projectSidewalk_id'})

In [31]:
projected_problems = projected_problems [['projectSidewalk_id', 'labelType', 'neighborhd','avgLblDate', 'severity', 'pednet_pid', 'projected_point']]

In [33]:
#check value counts for severity, include null
projected_problems['severity'].value_counts(dropna=False)

severity
2.0    3711
3.0    3581
1.0    1940
4.0    1588
5.0     704
NaN     396
Name: count, dtype: int64

In [34]:
# drop rows with null severity
projected_problems = projected_problems.dropna(subset=['severity'])

In [35]:
#save projected problems
projected_problems.to_file('../data/project-sidewalk/project-sidewalk-labels/projected/projected-problems/projected-problems.shp')

  projected_problems.to_file('../data/project-sidewalk/project-sidewalk-labels/projected/projected-problems/projected-problems.shp')
  ogr_write(
