# PCAs - baseline recreation, currently and under 10 SLR scenarios
* Photo-user-days (PUD) and Twitter-user-days (TUD) in each PCA
* Proportion of PUDs outside the flood zone of each SLR scenario, for each PCA

In [1]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import os
import glob
from functools import reduce

from bokeh.plotting import figure, output_notebook, show, ColumnDataSource
from bokeh.models import HoverTool
from bokeh.layouts import row
from bokeh.transform import factor_cmap
from bokeh.palettes import Spectral3

%matplotlib inline

### PUD and TUD distributions for the PCA network

PCA shapefile for PUDs and TUDs is the one provided here: https://abag.ca.gov/priority/conservation/, 

dissolved versions dissolved on 'joinkey', which appeared to be a unique identifier.

non-dissolved versions don't dissolve, and use the concat of ('joinkey', 'fipco') as the unique id, which matches what bcdc did during their slr intersections...

In [2]:
# pudtable = pd.read_csv('../data/pca/pca_flickr/userdays_avg_annual_bypid.csv')
pudtable = pd.read_csv('../data/pca/pca_flickr/not_dissolved/userdays_avg_annual_bypid.csv')
pudtable.rename(columns={'avg_ann_ud':'pud'}, inplace=True)

# tudtable = pd.read_csv('../data/pca/pca_twitter/userdays_avg_annual_bypid.csv')
tudtable = pd.read_csv('../data/pca/pca_twitter/tud_nondissolved_pcas/userdays_avg_annual_bypid.csv')
tudtable.rename(columns={'avg_ann_ud':'tud'}, inplace=True)

data = pudtable.merge(tudtable, on='pid')
data['logpud'] = np.log1p(data['pud'])
data['logtud'] = np.log1p(data['tud'])

In [None]:
# fig, axs = plt.subplots(1,2, figsize=(14,5))
# data.hist('pud', bins=50, ax=axs[0])
# data.hist('tud', bins=50, ax=axs[1])

### PUD and TUD are correlated, but capture slightly different visitation patterns

In [None]:
# fig, axs = plt.subplots(1,2, figsize=(14,5))
# data.plot.scatter(x='pud', y='tud', ax=axs[0])
# data.plot.scatter(x='logpud', y='logtud', ax=axs[1])

### PUD, TUD are influenced by area of PCA

In [4]:
# pca_shp = gpd.read_file('../data/pca/pca_twitter/shp/Priority_Conservation_Areas_current_dissolve_pid.shp')
pca_shp = gpd.read_file('../data/pca/pca_twitter/tud_nondissolved_pcas/shp/Priority_Conservation_Areas_current_pid.shp')
pca_shp['area_m2'] = pca_shp.area

pca_metadata = gpd.read_file('../data/pca/shapefiles/Priority_Conservation_Areas_current.dbf')
pcadat = pd.merge(pca_shp[['pid', 'joinkey', 'fipco', 'area_m2']], pca_metadata, on=['joinkey', 'fipco'])

In [5]:
data = data.merge(pcadat.drop(axis='columns', labels=['geometry', 'adopted', 'lastupdate', 'lat', 'lon']), on='pid')

In [6]:
data['dataqual'] = str(data['dataqual'])

In [7]:
data['logarea'] = np.log1p(data['area_m2'])
data['pud_per_m2'] = data['pud']/data['area_m2']
data['tud_per_m2'] = data['tud']/data['area_m2']

### Add Designation type of each PCA to the table
NL = Natural Landscape  
AL = Agricultural Land  
UG = Urban Greening  
RR = Regional Recreation

In [8]:
# designation = pd.read_csv('../pca/FINAL_List_of_165_PCAs_Designations.csv')
# # Source: https://abag.ca.gov/priority/conservation/pdfs/FINAL_List_of_165_PCAs_Approved_as_of_September_2015.pdf

# designation['NL'] = designation['Designation'].str.extract('(NL)')
# designation['AL'] = designation['Designation'].str.extract('(AL)')
# designation['UG'] = designation['Designation'].str.extract('(UG)')
# designation['RR'] = designation['Designation'].str.extract('(RR)')
# designation.fillna('', inplace=True)
# designation['desig'] = designation['NL']+designation['AL']+designation['UG']+designation['RR']
# designation.drop(labels='Designation', axis='columns', inplace=True)

# data = pd.merge(data, designation, left_on='joinkey', right_on='Area ID', how='left')

In [9]:
data.to_csv('../data/pca/dataproducts/pca_vis_designation.csv', index=False)

# #### joining data to shp to make some maps in qgis
shp = pca_shp.merge(data, on='pid')
shp.to_file('../data/pca/dataproducts/pca_current_visdata.shp')

In [10]:
output_notebook()

pcafactors = list(set(data['dataqual']))

source = ColumnDataSource(data)
hover1 = HoverTool(tooltips=[("name", "@name")])
hover2 = HoverTool(tooltips=[("name", "@name")])

p = figure(plot_width=400, plot_height=400, tools=[hover1],
           title="PCA - flickr visitation rates and pca size")

p.circle('logarea', 'logpud', size=5, color=factor_cmap('dataqual', palette=Spectral3, factors=pcafactors), alpha=0.5, source=source)
p.xaxis.axis_label = 'log(area)'
p.yaxis.axis_label = 'log(pud)'

p2 = figure(plot_width=400, plot_height=400, tools=[hover2],
           title="PCA - twitter visitation rates and pca size")

p2.circle('logarea', 'logtud', size=5, color='red', alpha=0.5, source=source)
p2.xaxis.axis_label = 'log(area)'
p2.yaxis.axis_label = 'log(tud)'

In [11]:
show(row(p, p2), notebook_handle=True)

## PCA recreation by SLR scenarios
1) BCDC intersected the PCA polygons with the flood zone polygons for all SLR scenarios.  
2) We counted PUDs inside those flood areas for each scenario  
3) Results summarized here as the Proportion of PUDs outside the flood zone of each SLR scenario, for each PCA

#### Data Processing

In [12]:
def join_scenario_puds(scenario, mediaplatform, whole_pca_df):
    '''
    scenario (string): one of a list of directory names like
        '../bcdc_slr/PCAexposure_201710/slr_scenarios/12inches'
    mediaplatform (string): either 'flickr' or 'twitter' 
        these are subdirectories of 'scenario'
    whole_pca_df (pandas dataframe): it has PCA metadata like the name and total PUD/TUD
    '''    
    print(scenario)
    
    # table with userdays in the scenario polygons, keyed by pid
    spud = pd.read_csv(os.path.join(scenario, mediaplatform, "userdays_avg_annual_bypid_2005_2015.csv"))
    
    # table linking userdays id (pid) and scenario polygon id ('FID_spjoin' or sometimes 'FID_Priori')
    # sometimes this table includes an 'area' column sometimes not, we won't use it
    pid_fidspjoin_crswlk = pd.read_csv(glob.glob(os.path.join(scenario, mediaplatform, '*_pid'))[0])
    if 'FID_spjoin' not in list(pid_fidspjoin_crswlk):
        pid_fidspjoin_crswlk.rename(columns={'FID_Priori':'FID_spjoin'}, inplace=True)
    pid_fidspjoin_crswlk = pid_fidspjoin_crswlk[['FID_spjoin', 'pid']]
    
    # table linking scenario polygon id (TARGET_FID) and master PCA unique id (joinkey, fipco)
    fidspjoin_joinkey_crswlk = gpd.read_file(glob.glob(os.path.join(scenario, 'spjoi*.dbf'))[0])
    fidspjoin_joinkey_crswlk = fidspjoin_joinkey_crswlk[['TARGET_FID', 'joinkey', 'fipco', 'name']]
    
    # joins to get scenario userdays linked to master PCA unique id (joinkey, fipco)
    spud_fid = pd.merge(spud, pid_fidspjoin_crswlk, on='pid')
    np.all(np.isnan(spud_fid['avg_ann_ud']))
    dat = pd.merge(spud_fid, fidspjoin_joinkey_crswlk, left_on='FID_spjoin', right_on='TARGET_FID')
    
    ## join scenario PUDs to whole PUDs
    dat2 = pd.merge(dat, whole_pca_df, on=['joinkey', 'fipco'], how='right')
    
    scen = os.path.basename(scenario)
    # proportion of PUD that are outside the flood zone
    dat2[scen] = 1 - dat2['avg_ann_ud']/dat2['pud'] ## !! hardcoded for PUD right now
    return(dat2[['joinkey', 'fipco', scen]])

In [13]:
data = pd.read_csv('../data/pca/dataproducts/pca_vis_designation.csv', dtype={'fipco': 'str'})
# data['desig'].fillna('', inplace=True) # because some PCAs missing data in the original designation table
data['uid'] = data['joinkey']+data['fipco']

In [18]:
basedir = '../data/pca/bcdc_slr/PCAexposure_201710/slr_scenarios/'
# excluding the 500yr scenario, since I'm missing the spjoin.dbf table
scenarios = ['12inches', '24inches', '36inches',  '48inches', '52inches', \
             '66inches', '77inches', '84inches', '96inches', '108inches', '100yr', 'sfrprecipstorm']

In [19]:
results = []
for scen in scenarios:
    results.append(join_scenario_puds(os.path.join(basedir, scen), 'flickr', data))

# make single table
slrdata = reduce(lambda x, y: pd.merge(x, y, on = ['joinkey', 'fipco']), results)

# single key for PCAs
slrdata['uid'] = slrdata['joinkey']+slrdata['fipco']
slrdata.drop(labels=['joinkey', 'fipco'], axis='columns', inplace=True)
slrdata.set_index('uid', inplace=True)
slrdata['0inches'] = 1
scenario_labels = ['0inches'] + scenarios
slrdata = slrdata[scenario_labels]
slrdata.fillna(1, inplace=True)

# numeric_index = [int(s.strip('inches')) for s in list(slrdata)]

../data/bcdc_slr/PCAexposure_201710/slr_scenarios/12inches
../data/bcdc_slr/PCAexposure_201710/slr_scenarios/24inches
../data/bcdc_slr/PCAexposure_201710/slr_scenarios/36inches
../data/bcdc_slr/PCAexposure_201710/slr_scenarios/48inches
../data/bcdc_slr/PCAexposure_201710/slr_scenarios/52inches
../data/bcdc_slr/PCAexposure_201710/slr_scenarios/66inches
../data/bcdc_slr/PCAexposure_201710/slr_scenarios/77inches
../data/bcdc_slr/PCAexposure_201710/slr_scenarios/84inches
../data/bcdc_slr/PCAexposure_201710/slr_scenarios/96inches
../data/bcdc_slr/PCAexposure_201710/slr_scenarios/108inches
../data/bcdc_slr/PCAexposure_201710/slr_scenarios/100yr
../data/bcdc_slr/PCAexposure_201710/slr_scenarios/sfrprecipstorm


In [None]:
data.head()

In [20]:
# join some PCA metadata
pcaslr = pd.merge(slrdata, data[['uid', 'pud', 'name', 'typenl', 'typeal', 'typeug', 'typerr', 'dataqual']], left_index=True, right_on='uid', how='left')
# could also join the % area flooded calculations from BCDC's table.

pcaslr = pcaslr[['uid', 'name', 'pud'] + scenario_labels + ['typenl', 'typeal', 'typeug', 'typerr', 'dataqual']]
pcaslr.to_csv('../data/pca/dataproducts/pca_vis_slrscenarios_designation.csv', index=False)

In [21]:
pcaslr.head()

Unnamed: 0,uid,name,pud,0inches,12inches,24inches,36inches,48inches,52inches,66inches,...,84inches,96inches,108inches,100yr,sfrprecipstorm,typenl,typeal,typeug,typerr,dataqual
9,MR46041,Marin County Agricultural Lands,130.272727,1,0.993022,0.988835,0.988137,0.862526,0.860433,0.847872,...,0.833217,0.832519,0.832519,0.769016,1.0,0,1,0,0,0 0\n1 0\n2 0\n3 1\n4 ...
10,MR156041,"Central Marin Bayfront, Madera Bay Park",40.727273,1,0.381696,0.357143,0.341518,0.053571,0.037946,0.015625,...,0.006696,0.004464,0.004464,0.022321,1.0,1,0,0,0,0 0\n1 0\n2 0\n3 1\n4 ...
11,MR186041,"Central Marin Bayfront, Canalways",15.636364,1,0.453488,0.348837,0.290698,0.168605,0.162791,0.110465,...,0.110465,0.098837,0.087209,0.122093,1.0,1,0,0,0,0 0\n1 0\n2 0\n3 1\n4 ...
12,MR146041,St. Vincent's and Silveira Properties,17.636364,1,1.0,1.0,1.0,0.690722,0.680412,0.592784,...,0.587629,0.587629,0.587629,0.556701,1.0,1,0,0,0,0 0\n1 0\n2 0\n3 1\n4 ...
15,MR96041,Bothin Waterfront,42.0,1,0.725108,0.461039,0.225108,0.099567,0.075758,0.058442,...,0.04329,0.04329,0.041126,0.097403,1.0,1,0,0,0,0 0\n1 0\n2 0\n3 1\n4 ...


#### Plotting

In [22]:
from bokeh.transform import factor_cmap
from bokeh.palettes import Spectral5
from bokeh.models.tickers import FixedTicker
# from beakerx import *

# from IPython.display import display, HTML

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [23]:
pcaslr = pd.read_csv('../data/pca/dataproducts/pca_vis_slrscenarios_designation.csv', dtype={'dataqual':'str'})
scenario_labels = list(pcaslr)[3:14]
numeric_index = [int(s.strip('inches')) for s in scenario_labels]

In [24]:
output_notebook()

pcafactors = list(set(pcaslr['dataqual']))

source = ColumnDataSource(data=dict(
            xs=[numeric_index]*len(pcaslr.index.values),
            ys=[pcaslr.loc[i, scenario_labels] for i in pcaslr.index.values],
            name=list(pcaslr['name']),
            pud=list(pcaslr['pud']),
            dataqual=list(pcaslr['dataqual'])
        )) 

p = figure(width=900, height=400, x_range=(-1,110),
           title='Each line is a PCA',
            x_axis_label = "inches of sea-level-rise",
            y_axis_label = "proportion of PUD outside the flood zone")
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.multi_line(xs='xs',
                ys='ys',
                source=source,
#                 line_color='red',
                line_color=factor_cmap('dataqual', palette=Spectral3, factors=pcafactors),
                line_alpha=0.4,
                line_width=1.5)

p.xaxis.ticker = FixedTicker(ticks=numeric_index)

p.add_tools(HoverTool(show_arrow=False, line_policy='next', tooltips=[
    ('PCA', '@name'),
    ('PUD', '@pud')
]))

# p.legend.orientation = "vertical"
# p.legend.location = "right"

# Summary of Recreation by SLR Scenarios
1) BCDC intersected the PCA polygons with the flood zone polygons for all SLR scenarios.  
2) We counted Photo-user-days (PUD) inside those flood areas for each scenario  
3) Results summarized here as the proportion of PUDs outside the flood zone of each SLR scenario, for each PCA

### Table: Proportion of PUDs outside the flood zone of each SLR scenario, for each PCA
The 'pud' column represents the baseline average annual photo-user-days, aka the '0inches' scenario.

In [25]:
pcaslr.sort_values(by=['pud'], ascending=False)

Unnamed: 0,uid,name,pud,0inches,12inches,24inches,36inches,48inches,52inches,66inches,...,84inches,96inches,108inches,100yr,sfrprecipstorm,typenl,typeal,typeug,typerr,dataqual
45,AL356001,Oakland Urban Greening,2909.545455,1,9.908764e-01,9.890330e-01,9.839088e-01,9.198250e-01,8.768942e-01,8.098422e-01,...,6.678644e-01,6.368067e-01,6.125293e-01,9.182315e-01,1.000000,0,0,1,0,0 0\n1 0\n2 0\n3 1\n4 ...
48,NP16055,Napa County Agricultural Lands and Watersheds,763.727273,1,9.885728e-01,9.854779e-01,9.835734e-01,9.829782e-01,9.825021e-01,9.809546e-01,...,9.797643e-01,9.796453e-01,9.795262e-01,8.312106e-01,1.000000,0,1,0,0,0 0\n1 0\n2 0\n3 1\n4 ...
29,MULTI46075,California Coastal Trail,737.000000,1,9.937091e-01,9.914888e-01,9.879117e-01,9.847046e-01,9.839645e-01,9.781670e-01,...,9.765635e-01,9.759467e-01,9.754533e-01,7.745159e-01,0.991242,0,0,0,1,0 0\n1 0\n2 0\n3 1\n4 ...
123,SM146081,Southern San Mateo Coast,536.000000,1,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,...,1.000000e+00,1.000000e+00,1.000000e+00,5.688602e-01,1.000000,1,1,0,1,0 0\n1 0\n2 0\n3 1\n4 ...
108,SN116097,Coastal Access and Resource Protection,523.545455,1,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,...,1.000000e+00,1.000000e+00,1.000000e+00,7.067199e-01,1.000000,1,0,0,0,0 0\n1 0\n2 0\n3 1\n4 ...
107,SN66097,Coastal Sonoma to Armstrong Redwoods,421.181818,1,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,...,1.000000e+00,1.000000e+00,1.000000e+00,6.863803e-01,1.000000,1,0,0,0,0 0\n1 0\n2 0\n3 1\n4 ...
159,SF106075,Crosstown Trail: Connecting Twin Peaks Bio-Reg...,375.363636,1,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,...,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,0.981109,1,0,1,1,0 0\n1 0\n2 0\n3 1\n4 ...
59,SM16081,Montara Mountain Complex,368.000000,1,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,...,1.000000e+00,1.000000e+00,1.000000e+00,6.820652e-01,1.000000,1,0,0,0,0 0\n1 0\n2 0\n3 1\n4 ...
49,NP66055,Napa Valley - Napa River Corridor,362.272727,1,9.329987e-01,9.267252e-01,9.124216e-01,9.099122e-01,9.066499e-01,9.008783e-01,...,8.898369e-01,8.717691e-01,8.582183e-01,4.205772e-01,1.000000,1,0,0,0,0 0\n1 0\n2 0\n3 1\n4 ...
162,AL306001,Oakland Natural Landscapes,315.636364,1,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,...,1.000000e+00,1.000000e+00,1.000000e+00,9.686060e-01,1.000000,1,0,0,0,0 0\n1 0\n2 0\n3 1\n4 ...


### Below, each line is a PCA (a row of the table )

In [26]:
show(p, notebook_handle=True)