In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np

In [2]:
points = gpd.read_file("../../data/recdata/scratch/coastal_access_locations_utm.shp")
points.head()

Unnamed: 0,BIKE_PATH,BLFTP_PRK,BLFTP_TRLS,BLUFF,BOATING,BT_FACIL_T,Bch_whlchr,CAMPGROUND,COUNTY,CountyNum,...,Photo_4,RESTROOMS,RKY_SHORE,SNDY_BEACH,STRS_BEACH,TIDEPOOL,VISTOR_CTR,VOLLEYBALL,WLDLFE_VWG,geometry
0,No,,,Yes,No,,707-465-7331,No,Del Norte,1,...,,No,No,Yes,No,,No,,Yes,POINT (399877.3210734062 4649689.492335939)
1,No,Yes,Yes,Yes,No,,,Yes,Del Norte,1,...,,Yes,No,Yes,Yes,,No,,No,POINT (400273.3581738284 4647534.701978522)
2,No,,,No,Yes,,,No,Del Norte,1,...,,No,Yes,No,Yes,,No,,Yes,POINT (400118.4789742854 4644671.497961469)
3,No,,,No,Yes,Boat Ramp,,No,Del Norte,1,...,,Yes,No,No,No,,No,,Yes,POINT (404777.3492687638 4638236.267370273)
4,No,,,No,No,,,No,Del Norte,1,...,,No,Yes,Yes,No,,No,,No,POINT (399449.9808277488 4635953.686565612)


In [3]:
access_vars = ['BIKE_PATH','BLFTP_PRK','BLFTP_TRLS','BLUFF','BOATING',
 'BT_FACIL_T','Bch_whlchr','CAMPGROUND','DOG_FRIEND','DSABLDACSS','DUNES',
 'EZ4STROLLE','FEE','FISHING','PARKING','PCNC_AREA','PTH_BEACH','RESTROOMS',
 'RKY_SHORE','SNDY_BEACH','STRS_BEACH','TIDEPOOL','VISTOR_CTR','VOLLEYBALL','WLDLFE_VWG']

## ditch the 'BT_FACIL_T' var rather than clean it up.
## it states the type of facility present rather than 'yes' or 'no'.
## we have another 'BOATING' variable with the yes/no, 
## so I don't think we care enough about the type of facility to process it.
if 'BT_FACIL_T' in access_vars: access_vars.remove('BT_FACIL_T')

In [4]:
points = points[access_vars + ['geometry']]

In [5]:
## load polygon segments to intersect - 
## note we're using the full buffers here, not the versions cut-off by the land
segments = gpd.read_file("./model_data/aoi/ne_ca_coastline_1k_segments_buff400m.shp")
segments.head()

Unnamed: 0,pid,geometry
0,0,"POLYGON ((392712.2897892545 4656848.358797132,..."
1,1,"POLYGON ((393277.8655840221 4656023.770353859,..."
2,2,"POLYGON ((393249.2407242999 4656004.136889528,..."
3,3,"POLYGON ((394533.8372112893 4655999.85968902, ..."
4,4,"POLYGON ((395728.4973521334 4654795.624178629,..."


In [6]:
# one:many join of polygon segments to all the access points
# this allows one polygon to capture many points
access_seg = gpd.sjoin(segments[['geometry', 'pid']], points, op='intersects', how='left')
print(segments.shape)
print(access_seg.shape)

(2332, 2)
(2642, 27)


In [7]:
## Now we want to convert all the vars to 1s or 0s (presence/absence)
## And then summarize the count of 1s for each variable in each polygon

In [8]:
# wasn't exactly sure what pd.get_dummies does so didn't use it, but could be handy
# dummy = pd.get_dummies(access_seg[access_vars])

In [9]:
## first cleanup the wheelchair var
## it often has a phone number and statement that access is provided if you call first.
## I'm not sure if that counts as access or not, decide here:
def wheelchair(x):
    if x:
        if str(x) in ['nan', 'No', 'no', 'Yes', 'yes']:
            return x
#         print(x) # all the rest are the phone numbers
        return 'no' # pick yes or no
    return x

access_seg[['Bch_whlchr']] = access_seg[['Bch_whlchr']].applymap(wheelchair)

In [10]:
def make_presence_absence(x):
    if x:
        if str(x) != 'nan':
            x = x.lower()
            if x in ['no']:
                return 0
            if x in ['yes']:
                return 1
            if x in ['yes?', '?']:
                return np.nan
            print(x) # should print nothing if we've caught all the weird values
        return np.nan # if it was nan to begin with
    return np.nan # if it was None to begin with

access_seg[access_vars] = access_seg[access_vars].applymap(make_presence_absence)

In [11]:
access_seg.head()

Unnamed: 0,geometry,pid,index_right,BIKE_PATH,BLFTP_PRK,BLFTP_TRLS,BLUFF,BOATING,Bch_whlchr,CAMPGROUND,...,PCNC_AREA,PTH_BEACH,RESTROOMS,RKY_SHORE,SNDY_BEACH,STRS_BEACH,TIDEPOOL,VISTOR_CTR,VOLLEYBALL,WLDLFE_VWG
0,"POLYGON ((392712.2897892545 4656848.358797132,...",0,,,,,,,,,...,,,,,,,,,,
1,"POLYGON ((393277.8655840221 4656023.770353859,...",1,,,,,,,,,...,,,,,,,,,,
2,"POLYGON ((393249.2407242999 4656004.136889528,...",2,,,,,,,,,...,,,,,,,,,,
3,"POLYGON ((394533.8372112893 4655999.85968902, ...",3,,,,,,,,,...,,,,,,,,,,
4,"POLYGON ((395728.4973521334 4654795.624178629,...",4,,,,,,,,,...,,,,,,,,,,


In [13]:
## for each col, group rows by pid, sum down the column
## and put result back into segments dataframe, which has 1 row per polygon/pid

## do a np.nansum unless they are all nan, in which case return nan.
## np.nansum treats nan as 0
def nansum_all(df, var):
    x = df[var]
    if np.all(np.isnan(x)):
        return np.nan
    else:
        return np.nansum(x)
    
for av in access_vars:
    segments[av] = access_seg[['pid'] + [av]].groupby('pid').apply(nansum_all, av)

In [14]:
segments.describe()

Unnamed: 0,pid,BIKE_PATH,BLFTP_PRK,BLFTP_TRLS,BLUFF,BOATING,Bch_whlchr,CAMPGROUND,DOG_FRIEND,DSABLDACSS,...,PCNC_AREA,PTH_BEACH,RESTROOMS,RKY_SHORE,SNDY_BEACH,STRS_BEACH,TIDEPOOL,VISTOR_CTR,VOLLEYBALL,WLDLFE_VWG
count,2332.0,615.0,177.0,243.0,486.0,637.0,75.0,635.0,228.0,634.0,...,364.0,629.0,637.0,506.0,637.0,630.0,114.0,479.0,115.0,542.0
mean,1165.5,0.325203,0.581921,0.769547,0.839506,0.210361,0.0,0.140157,0.850877,0.758675,...,0.706044,0.521463,0.861852,0.743083,0.879121,0.326984,0.605263,0.189979,0.547826,0.813653
std,673.334736,0.588931,0.678661,0.658996,0.791034,0.519754,0.0,0.360787,0.526521,0.856236,...,0.615562,0.62963,0.837264,0.763785,0.754121,0.684418,0.589261,0.465801,0.595944,0.706687
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,582.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,1165.5,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,...,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0
75%,1748.25,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0
max,2331.0,3.0,3.0,3.0,5.0,4.0,0.0,2.0,2.0,5.0,...,3.0,3.0,5.0,5.0,5.0,5.0,3.0,3.0,2.0,4.0


In [17]:
segments[['pid'] + access_vars].to_csv('model_data/predictors/access_yourcoast.csv', index=False)