In [114]:
import pandas as pd
import altair as alt

Import downloaded Excel sheet from [dashboard](https://www.waterboards.ca.gov/water_issues/programs/conservation_portal/water-use-explorer/) site 

In [52]:
df = pd.read_excel(
    'data/raw/provisional-data-impacts-for-making-conservation-a-way-of-life.xlsx',
    sheet_name='Averaged Data and Bins'
)

Clean `Agency` column

In [53]:
df['Agency'] = df['Agency'].str.replace('  ', ', ')\
    .str.replace(' Of', ' of')

Correct typo

In [169]:
df.loc[df.Agency == 'Westminister, City of', 'Agency'] = 'Westminster, City of'

Trim columns

In [170]:
trim_df = df[[
    'Agency',
    'Reductions needed to meet the objective based on 2025 standards, relative to the subset of urban uses subject to standards',
    '2025 Objective Reductions Bin',
    'Reductions needed to meet the objective based on 2030 standards, relative to the subset of urban uses subject to standards',
    '2030 Objective Reductions Bin',
    'Reductions needed to meet the objective based on 2035 standards, relative to the subset of urban uses subject to standards',
    '2035 Objective Reductions Bin',
    'Reductions needed to meet the objective based on 2040 standards, relative to the subset of urban uses subject to standards',
    '2040 Objective Reductions Bin'
]]

In [171]:
trim_df

Unnamed: 0,Agency,"Reductions needed to meet the objective based on 2025 standards, relative to the subset of urban uses subject to standards",2025 Objective Reductions Bin,"Reductions needed to meet the objective based on 2030 standards, relative to the subset of urban uses subject to standards",2030 Objective Reductions Bin,"Reductions needed to meet the objective based on 2035 standards, relative to the subset of urban uses subject to standards",2035 Objective Reductions Bin,"Reductions needed to meet the objective based on 2040 standards, relative to the subset of urban uses subject to standards",2040 Objective Reductions Bin
0,"Adelanto, City of",-0.080126,5-10% Reduction,-0.134278,10-20% Reduction,-0.202074,20-30% Reduction,-0.233978,20-30% Reduction
1,Alameda County Water District,0.000000,No Reduction,0.000000,No Reduction,-0.022461,Less Than 5% Reduction,-0.056130,5-10% Reduction
2,Alco Water Service,-0.162633,10-20% Reduction,-0.210086,20-30% Reduction,-0.259557,20-30% Reduction,-0.282837,20-30% Reduction
3,"Alhambra, City of",-0.060470,5-10% Reduction,-0.124691,10-20% Reduction,-0.179862,10-20% Reduction,-0.205826,20-30% Reduction
4,Amador Water Agency,0.000000,No Reduction,0.000000,No Reduction,0.000000,No Reduction,0.000000,No Reduction
...,...,...,...,...,...,...,...,...,...
391,Yuba City,0.000000,No Reduction,-0.012563,Less Than 5% Reduction,-0.108530,10-20% Reduction,-0.153690,10-20% Reduction
392,Yucaipa Valley Water District,0.000000,No Reduction,0.000000,No Reduction,0.000000,No Reduction,-0.048620,Less Than 5% Reduction
393,"Chowchilla, City of",-0.156339,10-20% Reduction,-0.190892,10-20% Reduction,-0.270763,20-30% Reduction,-0.308350,Greater Than 30% Reduction
394,Mountain House Community Services District,-0.326492,Greater Than 30% Reduction,-0.357461,Greater Than 30% Reduction,-0.393124,Greater Than 30% Reduction,-0.409907,Greater Than 30% Reduction


Count number of agencies in each reduction category, or bin

In [172]:
bins_columns = ['Agency','2025 Objective Reductions Bin','2030 Objective Reductions Bin','2035 Objective Reductions Bin','2040 Objective Reductions Bin']

In [173]:
melt_bins = pd.melt(trim_df[bins_columns], id_vars='Agency')

In [174]:
table = pd.pivot_table(melt_bins, index=['value'],
                       columns=['variable'], aggfunc="count").reset_index()

In [175]:
table.columns = table.columns.droplevel()

In [176]:
table = table.rename(columns={'': 'Category'})

In [177]:
table['Category'] = pd.Categorical(
    table['Category'], 
    [
        'No Reduction', 
        'Less Than 5% Reduction',  
        '5-10% Reduction',
        '10-20% Reduction', 
        '20-30% Reduction', 
        'Greater Than 30% Reduction'        
    ]
)

In [178]:
table = table.sort_values('Category')

In [179]:
table.columns = table.columns.str.strip(' Objective Reductions Bin')

In [180]:
table

variable,Category,2025,2030,2035,2040
5,No Reduction,258,227,150,124
4,Less Than 5% Reduction,39,43,45,33
2,5-10% Reduction,25,31,49,50
0,10-20% Reduction,43,52,69,83
1,20-30% Reduction,25,33,48,60
3,Greater Than 30% Reduction,6,10,35,46


In [181]:
table[~table.Category.isin(['No Reduction','Less Than 5% Reduction'])]['2040'].sum() / table['2040'].sum()

0.6035353535353535

In [182]:
table.to_csv('data/processed/summary-by-reduction-category.csv', index=False)

Create a dataframe of agencies and reduction targets for lookup table

In [183]:
agency_df = trim_df[[
    'Agency',
    'Reductions needed to meet the objective based on 2025 standards, relative to the subset of urban uses subject to standards',
    'Reductions needed to meet the objective based on 2030 standards, relative to the subset of urban uses subject to standards',
    'Reductions needed to meet the objective based on 2035 standards, relative to the subset of urban uses subject to standards',
    'Reductions needed to meet the objective based on 2040 standards, relative to the subset of urban uses subject to standards',
]].copy()

In [184]:
agency_df.columns = ['Agency', '2025', '2030', '2035', '2040']

Multiply percentages by 100

In [185]:
agency_df[['2025', '2030', '2035', '2040']] = agency_df[['2025', '2030', '2035', '2040']] * 100

In [186]:
agency_df.to_csv('data/processed/agency-table.csv', index=False)