### Create the process for merging new or corrected site data

In [75]:
import geopandas as gpd
import pandas as pd
import numpy as np
import os
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# this makes dataframes viewable as interactive tables with search and sort - you first need to install itables on your computer with pip install itables
from itables import init_notebook_mode
init_notebook_mode(all_interactive=True)


<IPython.core.display.Javascript object>

#### ** helpful links and snippets

In [20]:
# os.getcwd() # check what the working directory is

# keyboard shortcuts
# https://noteable.io/blog/jupyter-notebook-shortcuts-boost-productivity/

### set working directory to the github repo directory
this is where you change this so that it works on your computer too

In [27]:
os.chdir('/Users/sarahodges/spatial/SAVI/hudson_access/data/published')

### Import existing site points data

In [77]:
## this is actually the hap_site_points_fieldupdates_surveyintegration_8-2-23.geojson data from sara eichner, with the new entires removed
# using this to create the process of site data updates
old_site_points = gpd.read_file('data/hap_site_points.geojson')
# len(old_site_points.columns)

In [78]:
#### create a site_score dataframe and remove from data
site_score = old_site_points.copy()[['site_id', 'site_score']]
#site_score.head()

### Make corrections to existing site points data

In [82]:
# Filtering and Mutating operations
site_points = (
    old_site_points
    .drop(columns='site_score')  # Remove the 'site_score' column
    .assign(
        site_name=old_site_points["site_name"].where(old_site_points["site_name"] != "Fair Haven Rd & Fairwaters Lane", "Fair Haven Pier"),
        site_id=old_site_points["site_id"].where(old_site_points["site_name"] != "Fair Haven Rd & Fairwaters Lane", 540232)
    )
)

site_points.loc[site_points["site_id"] == "595580", "act_codes"] = "VISUAL"
site_points.loc[site_points["site_id"] == "146", "act_codes"] = "FISH"

site_points_df = pd.DataFrame(site_points.drop(columns='geometry'))
site_points_df

site_id,act_codes,access_id,site_name,site_label,site_address,site_description,hours_info,open_close_date,fee,fee_amount,public_transit,public_transit_description,url_public,phone_public,email_public,site_manager,phone_site_manager,email_site_manager,access_type,access_type_other,accessibility_description,safety,use_limits,water_depth_est,program_YN,program_name,program_description,program_hours,program_id,program_phone,program_url,program_contact,amenities_description,restrooms,changing_station,food,drinking_water,walking_trails,equipment_rental,boat_access,boat_launch_YN,bike_path_accessible,bike_path_access_description,bike_parking_rack,picnic_area,playground,parking,parking_description,pets_allowed,wheelchair_access_amenities,wheelchair_access_restrooms,wheelchair_access_trails,SWIM_YN,informal_swimming,lifeguard_SWIM,safety_SWIM,showers_SWIM,FISH_YN,fish_species_FISH,walking_path_FISH,permit_FISH,HPBL_YN,difficulty_level_HPBL,distance_parking_to_launch_HPBL,boat_launch_type_HPBL,boat_storage_HPBL,trailer_parking_HPBL,safety_HPBL,MPBL_YN,difficulty_level_MPBL,boat_launch_type_MPBL,distance_parking_to_launch_MPBL,boat_cleaning_requirements_MPBL,boat_inspections_MPBL,boat_storage_MPBL,boat_storage_overnight_MPBL,haul_out_MPBL,navigational_notes_MPBL,trailer_parking_MPBL,pump_out_MPBL,safety_MPBL,site_name_photo_01,site_name_photo_02,site_name_photo_03,photo_credits,source,sq_acres,status,owner,owner_type,municipality,county,state,waterbody,natural_no,water_quality_monitoring,typology,CreationDate,Creator,EditDate,Editor,GlobalID,ObjectID,lat,lon,x,y,geometry
Loading... (need help?),,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


### Import site updates 
Alyssa collected these over the summer - up to July 28 using the Survey123 s

In [86]:
## add new survey data
raw_surv_new = pd.read_csv('data/data_updates/Hudson_Access_Project_Data_Survey_0.csv')
raw_surv_new

ObjectID,GlobalID,county,lat,lon,municipality,natural_no,site_id,access_id,source,sq_acres,state,status,typology,waterbody,site_name,site_label,site_address,owner,owner_type,site_manager,phone_site_manager,email_site_manager,email_public,phone_public,url_public,site_description,photo_credits,safety,informal_swimming,use_limits,fee,fee_amount,hours_info,boat_launch_YN,equipment_rental,parking,parking_description,bike_parking_rack,restrooms,drinking_water,changing_station,food,picnic_area,playground,pets_allowed,amenities_description,wheelchair_access_amenities,wheelchair_access_restrooms,wheelchair_access_trails,accessibility_description,boat_access,public_transit,public_transit_description,bike_path_accessible,bike_path_access_description,program_id,program_name,program_contact,program_phone,program_url,program_hours,program_description,HPBL_YN,difficulty_level_HPBL,safety_HPBL,trailer_parking_HPBL,distance_parking_to_launch_HPBL,boat_launch_type_HPBL,boat_storage_HPBL,MPBL_YN,difficulty_level_MPBL,safety_MPBL,trailer_parking_MPBL,distance_parking_to_launch_MPBL,boat_launch_type_MPBL,boat_storage_MPBL,boat_storage_overnight_MPBL,navigational_notes_MPBL,pump_out_MPBL,haul_out_MPBL,boat_cleaning_requirements_MPBL,boat_inspections_MPBL,FISH_YN,permit_FISH,walking_path_FISH,fish_species_FISH,SWIM_YN,safety_SWIM,showers_SWIM,lifeguard_SWIM,CreationDate,Creator,EditDate,Editor,site_name_photo_01,site_name_photo_02,site_name_photo_03,access_type,access_type_other,water_depth_est,water_quality_monitoring,open_close_date,walking_trails,program_YN,act_codes,x,y
Loading... (need help?),,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [19]:
# surv_new.head()

In [83]:
### check the column differences

site_cols = site_points_df.columns
survey_cols = raw_surv_new.columns

site_cols.difference(survey_cols)

Index([], dtype='object')

In [91]:
## create a list of updated site_ids 
updated_sites = raw_surv_new.copy()[['site_id', 'site_name']]
updated_sites = updated_sites.loc[updated_sites['site_id'] != 'needs id']
updated_sites

merged_df = updated_sites.merge(site_points_df, on='site_id', how='inner')
merged_df

Unnamed: 0,site_id,site_name_x,act_codes,access_id,site_name_y,site_label,site_address,site_description,hours_info,open_close_date,fee,fee_amount,public_transit,public_transit_description,url_public,phone_public,email_public,site_manager,phone_site_manager,email_site_manager,access_type,access_type_other,accessibility_description,safety,use_limits,water_depth_est,program_YN,program_name,program_description,program_hours,program_id,program_phone,program_url,program_contact,amenities_description,restrooms,changing_station,food,drinking_water,walking_trails,equipment_rental,boat_access,boat_launch_YN,bike_path_accessible,bike_path_access_description,bike_parking_rack,picnic_area,playground,parking,parking_description,pets_allowed,wheelchair_access_amenities,wheelchair_access_restrooms,wheelchair_access_trails,SWIM_YN,informal_swimming,lifeguard_SWIM,safety_SWIM,showers_SWIM,FISH_YN,fish_species_FISH,walking_path_FISH,permit_FISH,HPBL_YN,difficulty_level_HPBL,distance_parking_to_launch_HPBL,boat_launch_type_HPBL,boat_storage_HPBL,trailer_parking_HPBL,safety_HPBL,MPBL_YN,difficulty_level_MPBL,boat_launch_type_MPBL,distance_parking_to_launch_MPBL,boat_cleaning_requirements_MPBL,boat_inspections_MPBL,boat_storage_MPBL,boat_storage_overnight_MPBL,haul_out_MPBL,navigational_notes_MPBL,trailer_parking_MPBL,pump_out_MPBL,safety_MPBL,site_name_photo_01,site_name_photo_02,site_name_photo_03,photo_credits,source,sq_acres,status,owner,owner_type,municipality,county,state,waterbody,natural_no,water_quality_monitoring,typology,CreationDate,Creator,EditDate,Editor,GlobalID,ObjectID,lat,lon,x,y
Loading... (need help?),,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [92]:
### Next steps
# add site_id to all the new sites
# count the duplicates in the existing points
# remove dupes
# check the old data to see if there is anything that needs to be kept
# antijoin
# convert to spatial dataframe
# write out the dataframe
# change web code

updated_sites

Unnamed: 0,site_id,site_name
Loading... (need help?),,


### Create checks to ensure that the new data will work with the website

 - act_codes have no spaces
 - all sites have act_codes
 - all sites have site_id