# PCAs - baseline recreation, currently and under 10 SLR scenarios
* Photo-user-days (PUD) and Twitter-user-days (TUD) in each PCA
* Proportion of PUDs outside the flood zone of each SLR scenario, for each PCA

In [1]:
import pandas as pd
import geopandas as gpd
# import matplotlib.pyplot as plt
# %matplotlib inline
import numpy as np
import os
import glob
from functools import reduce

from bokeh.plotting import figure, output_notebook, show, ColumnDataSource
from bokeh.models import HoverTool
from bokeh.layouts import row
from bokeh.transform import factor_cmap
from bokeh.palettes import Spectral3, Dark2
from bokeh.models.tickers import FixedTicker
from bokeh.resources import INLINE
# from beakerx import *

# from IPython.display import display, HTML

# from IPython.core.interactiveshell import InteractiveShell
# InteractiveShell.ast_node_interactivity = "all"

In [2]:
# output_notebook(resources=INLINE)
output_notebook()

### PUD and TUD distributions for the PCA network

PCA shapefile for PUDs and TUDs is the one provided here: https://abag.ca.gov/priority/conservation/, 

dissolved versions dissolved on 'joinkey', which appeared to be a unique identifier.

non-dissolved versions don't dissolve, and use the concat of ('joinkey', 'fipco') as the unique id, which matches what bcdc did during their slr intersections...

In [3]:
# pudtable = pd.read_csv('../data/pca/pca_flickr/userdays_avg_annual_bypid.csv')
pudtable = pd.read_csv('../data/pca/pca_flickr/not_dissolved/userdays_avg_annual_bypid.csv')
pudtable.rename(columns={'avg_ann_ud':'pud'}, inplace=True)

# tudtable = pd.read_csv('../data/pca/pca_twitter/userdays_avg_annual_bypid.csv')
tudtable = pd.read_csv('../data/pca/pca_twitter/tud_nondissolved_pcas/userdays_avg_annual_bypid.csv')
tudtable.rename(columns={'avg_ann_ud':'tud'}, inplace=True)

data = pudtable.merge(tudtable, on='pid')
data['logpud'] = np.log1p(data['pud'])
data['logtud'] = np.log1p(data['tud'])

In [4]:
# fig, axs = plt.subplots(1,2, figsize=(14,5))
# data.hist('pud', bins=50, ax=axs[0])
# data.hist('tud', bins=50, ax=axs[1])

### PUD and TUD are correlated, but capture slightly different visitation patterns

In [5]:
# fig, axs = plt.subplots(1,2, figsize=(14,5))
# data.plot.scatter(x='pud', y='tud', ax=axs[0])
# data.plot.scatter(x='logpud', y='logtud', ax=axs[1])

### PUD, TUD are influenced by area of PCA

In [6]:
# pca_shp = gpd.read_file('../data/pca/pca_twitter/shp/Priority_Conservation_Areas_current_dissolve_pid.shp')
pca_shp = gpd.read_file('../data/pca/pca_twitter/tud_nondissolved_pcas/shp/Priority_Conservation_Areas_current_pid.shp')
pca_shp['area_m2'] = pca_shp.area

pca_metadata = gpd.read_file('../data/pca/shapefiles/Priority_Conservation_Areas_current.dbf')
pcadat = pd.merge(pca_shp[['pid', 'joinkey', 'fipco', 'area_m2']], pca_metadata, on=['joinkey', 'fipco'])

In [7]:
data = data.merge(pcadat.drop(axis='columns', labels=['geometry', 'adopted', 'lastupdate', 'lat', 'lon']), on='pid')

In [8]:
data.dataqual = data.dataqual.astype(str)
data['logarea'] = np.log1p(data['area_m2'])
data['pud_per_m2'] = data['pud']/data['area_m2']
data['tud_per_m2'] = data['tud']/data['area_m2']

### Designation types of PCAs
NL = Natural Landscape  
AL = Agricultural Land  
UG = Urban Greening  
RR = Regional Recreation

In [9]:
data.to_csv('../data/pca/dataproducts/pca_vis_designation.csv', index=False)

# #### joining data to shp to make some maps in qgis
shp = pca_shp.merge(data, on='pid')
shp.to_file('../data/pca/dataproducts/pca_current_visdata.shp')

In [10]:
pcafactors = list(set(data['dataqual']))
pal = Dark2[3]
dqpal = ['#c1131c', '#c1131c', '#888888'] # pcafactors are [1,2,0]

source = ColumnDataSource(data)
hover1 = HoverTool(tooltips=[("name", "@name")])
hover2 = HoverTool(tooltips=[("name", "@name")])

p = figure(plot_width=400, plot_height=400, tools=[hover1],
           title="PCA - flickr visitation rates and pca size")

p.circle('logarea', 'logpud', size=5, 
         color=factor_cmap('dataqual', palette=dqpal, factors=pcafactors), 
         alpha=0.5, 
         source=source,
         legend='dataqual')
p.xaxis.axis_label = 'log(area)'
p.yaxis.axis_label = 'log(pud)'

p2 = figure(plot_width=400, plot_height=400, tools=[hover2],
           title="PCA - twitter visitation rates and pca size")

p2.circle('logarea', 'logtud', size=5, 
          color=factor_cmap('dataqual', palette=dqpal, factors=pcafactors), 
          alpha=0.5, 
          source=source)
p2.xaxis.axis_label = 'log(area)'
p2.yaxis.axis_label = 'log(tud)'

p.legend.orientation = "vertical"
p.legend.location = "top_left"

In [11]:
show(row(p, p2), notebook_handle=True)

## PCA recreation by SLR scenarios
1) BCDC intersected the PCA polygons with the flood zone polygons for all SLR scenarios.  
2) We counted PUDs inside those flood areas for each scenario  
3) Results summarized here as the Proportion of PUDs outside the flood zone of each SLR scenario, for each PCA

#### Data Processing

In [12]:
def join_scenario_puds(scenario, mediaplatform, whole_pca_df):
    '''
    scenario (string): one of a list of directory names like
        '../bcdc_slr/PCAexposure_201710/slr_scenarios/12inches'
    mediaplatform (string): either 'flickr' or 'twitter' 
        these are subdirectories of 'scenario'
    whole_pca_df (pandas dataframe): it has PCA metadata like the name and total PUD/TUD
    '''    
    print(scenario)
    
    # table with userdays in the scenario polygons, keyed by pid
    spud = pd.read_csv(os.path.join(scenario, mediaplatform, "userdays_avg_annual_bypid_2005_2015.csv"))
    
    # table linking userdays id (pid) and scenario polygon id ('FID_spjoin' or sometimes 'FID_Priori')
    # sometimes this table includes an 'area' column sometimes not, we won't use it
    pid_fidspjoin_crswlk = pd.read_csv(glob.glob(os.path.join(scenario, mediaplatform, '*_pid'))[0])
    if 'FID_spjoin' not in list(pid_fidspjoin_crswlk):
        pid_fidspjoin_crswlk.rename(columns={'FID_Priori':'FID_spjoin'}, inplace=True)
    pid_fidspjoin_crswlk = pid_fidspjoin_crswlk[['FID_spjoin', 'pid']]
    
    # table linking scenario polygon id (TARGET_FID) and master PCA unique id (joinkey, fipco)
    fidspjoin_joinkey_crswlk = gpd.read_file(glob.glob(os.path.join(scenario, 'spjoi*.dbf'))[0])
    fidspjoin_joinkey_crswlk = fidspjoin_joinkey_crswlk[['TARGET_FID', 'joinkey', 'fipco', 'name']]
    
    # joins to get scenario userdays linked to master PCA unique id (joinkey, fipco)
    spud_fid = pd.merge(spud, pid_fidspjoin_crswlk, on='pid')
    np.all(np.isnan(spud_fid['avg_ann_ud']))
    dat = pd.merge(spud_fid, fidspjoin_joinkey_crswlk, left_on='FID_spjoin', right_on='TARGET_FID')
    
    ## join scenario PUDs to whole PUDs
    dat2 = pd.merge(dat, whole_pca_df, on=['joinkey', 'fipco'], how='right')
    
    scen = os.path.basename(scenario)
    # proportion of PUD that are inside the flood zone
    dat2[scen] = dat2['avg_ann_ud']/dat2['pud'] ## !! hardcoded for PUD right now
    return(dat2[['joinkey', 'fipco', scen]])

In [13]:
data = pd.read_csv('../data/pca/dataproducts/pca_vis_designation.csv', dtype={'fipco': 'str', 'dataqual': 'str'})
# data['desig'].fillna('', inplace=True) # because some PCAs missing data in the original designation table
data['uid'] = data['joinkey']+data['fipco']

In [14]:
basedir = '../data/pca/bcdc_slr/PCAexposure_201710/slr_scenarios/'
# excluding the 500yr scenario, since I'm missing the spjoin.dbf table
scenarios = ['12inches', '24inches', '36inches',  '48inches', '52inches', \
             '66inches', '77inches', '84inches', '96inches', '108inches', '100yr', 'sfrprecipstorm']

In [15]:
results = []
for scen in scenarios:
    results.append(join_scenario_puds(os.path.join(basedir, scen), 'flickr', data))

# make single table
slrdata = reduce(lambda x, y: pd.merge(x, y, on = ['joinkey', 'fipco']), results)

# single key for PCAs
slrdata['uid'] = slrdata['joinkey']+slrdata['fipco']
slrdata.drop(labels=['joinkey', 'fipco'], axis='columns', inplace=True)
slrdata.set_index('uid', inplace=True)
slrdata['0inches'] = 0
scenario_labels = ['0inches'] + scenarios
slrdata = slrdata[scenario_labels]
slrdata.fillna(0, inplace=True)

# numeric_index = [int(s.strip('inches')) for s in list(slrdata)]

../data/pca/bcdc_slr/PCAexposure_201710/slr_scenarios/12inches
../data/pca/bcdc_slr/PCAexposure_201710/slr_scenarios/24inches
../data/pca/bcdc_slr/PCAexposure_201710/slr_scenarios/36inches
../data/pca/bcdc_slr/PCAexposure_201710/slr_scenarios/48inches
../data/pca/bcdc_slr/PCAexposure_201710/slr_scenarios/52inches
../data/pca/bcdc_slr/PCAexposure_201710/slr_scenarios/66inches
../data/pca/bcdc_slr/PCAexposure_201710/slr_scenarios/77inches
../data/pca/bcdc_slr/PCAexposure_201710/slr_scenarios/84inches
../data/pca/bcdc_slr/PCAexposure_201710/slr_scenarios/96inches
../data/pca/bcdc_slr/PCAexposure_201710/slr_scenarios/108inches
../data/pca/bcdc_slr/PCAexposure_201710/slr_scenarios/100yr
../data/pca/bcdc_slr/PCAexposure_201710/slr_scenarios/sfrprecipstorm


In [16]:
# join some PCA metadata
pcaslr = pd.merge(slrdata, data[['uid', 'pud', 'name', 'typenl', 'typeal', 'typeug', 'typerr', 'dataqual']], left_index=True, right_on='uid', how='left')
# could also join the % area flooded calculations from BCDC's table.

pcaslr = pcaslr[['uid', 'name', 'pud'] + scenario_labels + ['typenl', 'typeal', 'typeug', 'typerr', 'dataqual']]
pcaslr.to_csv('../data/pca/dataproducts/pca_vis_slrscenarios_designation.csv', index=False)

#### Plotting

In [17]:
pcaslr = pd.read_csv('../data/pca/dataproducts/pca_vis_slrscenarios_designation.csv', dtype={'dataqual':'str'})
scenario_labels = list(pcaslr)[3:14]
numeric_index = [int(s.strip('inches')) for s in scenario_labels]

In [18]:
pcafactors = list(set(pcaslr['dataqual']))
# pal = Dark2[3]

source = ColumnDataSource(data=dict(
            xs=[numeric_index]*len(pcaslr.index.values),
            ys=[pcaslr.loc[i, scenario_labels] for i in pcaslr.index.values],
            name=list(pcaslr['name']),
            pud=list(pcaslr['pud']),
            dataqual=list(pcaslr['dataqual'])
        )) 

p = figure(plot_width=900, plot_height=400, x_range=(-1,110),
           title='Each line is a PCA',
            x_axis_label = "inches of sea-level-rise",
            y_axis_label = "proportion of PUD exposed to the flood zone")
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
r = p.multi_line(xs='xs', ys='ys', source=source,
#                 line_color='red',
                line_color=factor_cmap('dataqual', palette=dqpal, factors=pcafactors),
#                 legend='dataqual',
                line_alpha=0.6, line_width=0.5,
                hover_line_alpha=1.0, hover_line_color='#ff0000')
r.hover_glyph.line_width=3 # hover_glyph attribute not found on p, but found on r, wtf
p.xaxis.ticker = FixedTicker(ticks=numeric_index)

p.add_tools(HoverTool(show_arrow=False, line_policy='next', tooltips=[
    ('PCA', '@name'),
    ('PUD', '@pud')
]))

p.legend.orientation = "vertical"
p.legend.location = "top_left"

In [19]:
# show(p, notebook_handle=True)

# Summary of Recreation by SLR Scenarios
1) BCDC intersected the PCA polygons with the flood zone polygons for all SLR scenarios.  
2) We counted Photo-user-days (PUD) inside those flood areas for each scenario  
3) Results summarized here as the proportion of PUDs outside the flood zone of each SLR scenario, for each PCA

### Table: Proportion of PUDs outside the flood zone of each SLR scenario, for each PCA
The 'pud' column represents the baseline average annual photo-user-days, aka the '0inches' scenario.

In [21]:
pcaslr.sort_values(by=['pud'], ascending=False)

Unnamed: 0,uid,name,pud,0inches,12inches,24inches,36inches,48inches,52inches,66inches,...,84inches,96inches,108inches,100yr,sfrprecipstorm,typenl,typeal,typeug,typerr,dataqual
45,AL356001,Oakland Urban Greening,2909.545455,0,0.009124,0.010967,0.016091,0.080175,0.123106,0.190158,...,0.332136,0.363193,0.387471,0.081768,0.000000,0,0,1,0,1
48,NP16055,Napa County Agricultural Lands and Watersheds,763.727273,0,0.011427,0.014522,0.016427,0.017022,0.017498,0.019045,...,0.020236,0.020355,0.020474,0.168789,0.000000,0,1,0,0,1
29,MULTI46075,California Coastal Trail,737.000000,0,0.006291,0.008511,0.012088,0.015295,0.016036,0.021833,...,0.023437,0.024053,0.024547,0.225484,0.008758,0,0,0,1,1
123,SM146081,Southern San Mateo Coast,536.000000,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.431140,0.000000,1,1,0,1,2
108,SN116097,Coastal Access and Resource Protection,523.545455,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.293280,0.000000,1,0,0,0,2
107,SN66097,Coastal Sonoma to Armstrong Redwoods,421.181818,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.313620,0.000000,1,0,0,0,2
159,SF106075,Crosstown Trail: Connecting Twin Peaks Bio-Reg...,375.363636,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.018891,1,0,1,1,2
59,SM16081,Montara Mountain Complex,368.000000,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.317935,0.000000,1,0,0,0,0
49,NP66055,Napa Valley - Napa River Corridor,362.272727,0,0.067001,0.073275,0.087578,0.090088,0.093350,0.099122,...,0.110163,0.128231,0.141782,0.579423,0.000000,1,0,0,0,1
162,AL306001,Oakland Natural Landscapes,315.636364,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.031394,0.000000,1,0,0,0,1


In [22]:
pcaslr.loc[pcaslr.name == 'Menlo Park and East Palo Alto Baylands']

Unnamed: 0,uid,name,pud,0inches,12inches,24inches,36inches,48inches,52inches,66inches,...,84inches,96inches,108inches,100yr,sfrprecipstorm,typenl,typeal,typeug,typerr,dataqual
39,SM106081,Menlo Park and East Palo Alto Baylands,41.545455,0,0.12035,0.420131,0.446389,0.483589,0.527352,0.603939,...,0.630197,0.636761,0.63895,0.577681,0.0,1,0,0,1,1


### Below, each line is a PCA (a row of the table )

In [23]:
show(p, notebook_handle=True)