In [None]:
# %cd ./work
# %pwd

In [None]:
%run "../catalog_common.py"

In [None]:
# import sys
# sys.path.insert(0,'c:/MyDocs/OpenFF/src/openFF-catalog/')
# import catalog_common as cc

In [None]:
#preamble to analysis
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pylab import gca, mpl

%matplotlib inline
import seaborn as sns
import matplotlib.ticker
from IPython.display import Markdown as md
from IPython.display import HTML, display
from time import sleep

from itables import init_notebook_mode
init_notebook_mode(all_interactive=True)
from itables import show as iShow
import itables.options as opt
opt.order = []  # no sorting

In [None]:
alldf = pd.read_csv('state.csv',low_memory=False)
alldf.date = pd.to_datetime(alldf.date)
statename = alldf.bgStateName.iloc[0]

In [None]:
ID_header(statename.title(), subtitle='Open-FF State Summary',incl_links=True,
          link_up_level=True)
set_page_param()

In [None]:
def xlate_val(n):
    if n==0:
        return ''
    if n<1000:
        return round_sig(n,1)
    x = round_sig(n,1)
    return x[0]+ 'k'

def make_annot(gb):
    annot = gb.copy()
    annot.UploadKey = annot.UploadKey.map(lambda x: xlate_val(x))
    #print(annot)
    piv = annot.pivot(index='County',columns='year',values='UploadKey')
    piv.fillna('',inplace=True)
    #print(piv)
    return piv
    
def CountyMap(df):
    start_loc = get_state_center(statename)
    #print(statename,start_loc)
    cond = (df.loc_within_state=='YES')&(df.loc_within_county=='YES')
    if cond.sum()==0:  # no valid fracks for this state
        display(md('## No mappable fracks for this state!'))
        display(md(f'Any data in this state set may be labeled incorrectly as {statename}'))
        return
    gb = df[cond].groupby(['bgStateName','bgCountyName',
                                                   'UploadKey'],as_index=False)['bgCAS'].count()
    gb = gb.groupby(['bgStateName','bgCountyName'],as_index=False)['UploadKey'].count().rename({'bgStateName':'StateName',
                                                                                                'bgCountyName':'CountyName',
                                                                                                'UploadKey':'value'},
                                                                                                axis=1)
    zoom = 6
    if statename in ['texas','california']:
        zoom = 5
    if statename in ['alaska']:
        zoom = 4
        
    create_county_choropleth(gb,plotlog=True,custom_scale= [0,1,2,3,4,5],
                             start_loc=start_loc, # center of state's data
                             legend_name='Number of FracFocus disclosures',
                             start_zoom=zoom,fields=['StateName','CountyName','orig_value'],
                             aliases=['State: ','County: ','Number Fracking disclosures: '])

def CountyCntTable(df):
    # first, make the general searchable table
    gb = df.groupby(['bgCountyName','UploadKey'],as_index=False)['date'].first()
    gb['year'] = gb.date.dt.year.astype('str')
    gb1 = gb.groupby(['bgCountyName'],as_index=False)['UploadKey'].count().rename({'UploadKey':'disclosure_count'},
                                                                                  axis=1)
    gb2 = gb1.copy()
    gbop = df.groupby('bgCountyName')['bgOperatorName'].agg(lambda x:x.value_counts().index[0:4]).reset_index()
    gbop.bgOperatorName = gbop.bgOperatorName.map(lambda x: xlate_to_str(x,'; ',sort=False))
    gbop = gbop.rename({'bgOperatorName':'Top Operators'},axis=1)
    gb2 = pd.merge(gb2,gbop,on='bgCountyName',how='left')

    gbprop = df[df.bgCAS=='proprietary'].groupby('bgCountyName',as_index=False)['bgCAS'].count()
#     gbprop.bgCAS.fillna(0,inplace=True)
    gbprop = gbprop.rename({'bgCAS':'Trade Secret records'},axis=1)
    gb2 = pd.merge(gb2,gbprop,on='bgCountyName',how='left')
    gb2['Trade Secret records'].fillna(0,inplace=True)
    
    gbtbwv = df.groupby(['bgCountyName','UploadKey'],as_index=False)['TotalBaseWaterVolume'].first()
    gbtbwv = gbtbwv.groupby('bgCountyName',as_index=False)['TotalBaseWaterVolume'].sum().rename({'TotalBaseWaterVolume':'tot_gallons_water'},
                                                                                                axis=1)
    gbtbwv.tot_gallons_water = gbtbwv.tot_gallons_water.map(lambda x: round_sig(x,3))
    gb2 = pd.merge(gb2,gbtbwv,on='bgCountyName',how='left')

    #print(gb2.head())
    gb2['County'] = '<center><h4>'+gb2.bgCountyName.str.title().map(lambda x: getCountyLink(x,statename,x))+'</h4></center>'
    gb2 = gb2.drop('bgCountyName',axis=1)
    iShow(gb2.sort_values('disclosure_count',ascending=False)[['County','disclosure_count',
                                                               'Trade Secret records','tot_gallons_water',
                                                               'Top Operators']].reset_index(drop=True))
        
    # Now make the heatmap
    gb3 = gb.groupby(['bgCountyName','year'],as_index=False)['UploadKey'].count()
    gb3 = gb3.rename({'bgCountyName':'County'},axis=1)
    gb3 = pd.merge(gb3,gb1,left_on='County',right_on='bgCountyName',how='left')
    gb3.County = gb3.County.str.title()

    gb_annot = make_annot(gb3)
    #print(gb_annot)
    gb3.UploadKey = gb3.UploadKey/gb3.disclosure_count *100
    piv = gb3.pivot(index='County',columns='year',values='UploadKey')
    #piv = piv.reset_index()
    #piv['County'] = '<center><h3>'+piv.CountyName.str.title().map(lambda x: getCountyLink(x,statename,x))+'</h3></center>'
    #piv = piv.drop('CountyName',axis=1)
    piv.fillna(0,inplace=True)

    
    fig = plt.figure(figsize=(len(piv.columns)*.75,len(piv)/3+3))
    ax = sns.heatmap(piv,cmap="Reds",annot=gb_annot,fmt='')
    plt.ylabel(f'Counties',fontsize=14);
    plt.xlabel(f'Year',fontsize=14);
    plt.title(f"Percent Distribution of county's disclosures by year",fontsize=16)
    #plt.title("Annotations are the number of disclosures (rounded)",fontsize=12)
    ax.set_xticklabels(ax.get_xticklabels(),rotation = 45)
    ax.set_yticklabels(ax.get_yticklabels(),rotation = 0,fontsize=14);
    ax.xaxis.set_ticks_position('top')
    plt.show()
    display(md("     Annotations are the number of disclosures that year (rounded)"))

# Where are the fracking locations in this state?
This is not an exhaustive set of wells in these counties; it is only those wells for which the operating company submits a chemical disclosure to FracFocus.  In addition, this map omits disclosures for which location information is conflicting, such as the Latitude/Longitude values are outside of the reported county.

In [None]:
CountyMap(alldf)

---
## County-based details

In [None]:
CountyCntTable(alldf)