In [19]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime, date, timedelta
from tableauscraper import TableauScraper as TS
import altair as alt
import altair_latimes as lat

### Download the tableau dashboard

In [20]:
url = "https://public.tableau.com/views/CaliforniaUrbanWaterProduction_15785959527960/StatewideUseandProduction"

In [21]:
ts = TS()
ts.loads(url)
workbook = ts.getWorkbook()

Get the conservation figures for hydrologic regions and districts

In [22]:
conservation = workbook.goToSheet("Cumulative Savings ")
for t in conservation.worksheets:
    if t.name == "Savings by Hydro":
        
        filters = t.getFilters()
        #print(filters)
        filter_values = filters[0]['values']
        df_list = []
        for v in filter_values:
            wb = t.setFilter('Strmonth', v)
            monthlyWs = wb.getWorksheet("Savings by Hydro")
            #print(monthlyWs.data)
            monthly_df = monthlyWs.data
            #monthly_df["month"] = m
            df_list.append(monthly_df)
    # elif t.name == "Savings Map":
    #     district_df = t.data
    else:
        pass



In [23]:
len(df_list)

10

In [24]:
hydro_df = pd.concat(df_list)

---
### Clean up dataframe

In [7]:
hydro_df.columns

Index(['Region-value', 'Region-alias', 'Strmonth-value', 'Strmonth-alias',
       'SUM(Baseline)-alias', 'SUM(count_supplier)-alias',
       'SUM(Current)-alias', 'SUM(Total Population Served)-alias',
       'ATTR(Year)-alias', 'ATTR(Hydrologic Region)-alias',
       'AGG(Savings_calc)-alias'],
      dtype='object')

In [8]:
keeps = ['Region-value', 'Strmonth-alias',
       'SUM(Baseline)-alias', 'SUM(count_supplier)-alias',
       'SUM(Current)-alias', 'SUM(Total Population Served)-alias',
       'ATTR(Year)-alias', 'AGG(Savings_calc)-alias']

In [9]:
hydro_trim = hydro_df[keeps]

In [10]:
rename = {
    'Region-value': 'hydrologic_region', 
    'Strmonth-alias': 'month', 
    'SUM(Baseline)-alias': 'water_use_baseline',
    'SUM(count_supplier)-alias': 'supplier_count',
    'SUM(Current)-alias': 'water_use_current',
    'SUM(Total Population Served)-alias': 'total_population_served', 
    'ATTR(Year)-alias' :'year',
    'AGG(Savings_calc)-alias': 'pct_savings'  
}

In [11]:
hydro_rename = hydro_trim.rename(columns=rename)

In [12]:
hydro_rename['water_use_baseline'] = hydro_rename['water_use_baseline'].str.replace(",","").astype(int)

In [13]:
hydro_rename['water_use_current'] = hydro_rename['water_use_current'].str.replace(",","").astype(int)

In [14]:
hydro_rename.loc[(hydro_rename.hydrologic_region == "%all%"), "hydrologic_region"] = "Statewide"

In [15]:
hydro_rename.loc[(hydro_rename.year == "%null%"), "year"] = "Cumulative"

In [16]:
hydro_rename[hydro_rename.hydrologic_region == "Statewide"]

Unnamed: 0,hydrologic_region,month,water_use_baseline,supplier_count,water_use_current,total_population_served,year,pct_savings
0,Statewide,July,204221072888,410,200511267287,36968289,2021,-0.018166
0,Statewide,August,207674317632,407,197358551096,37045016,2021,-0.049673
0,Statewide,September,188005313236,409,180684670905,37020222,2021,-0.038938
0,Statewide,October,175983659543,406,152705229271,36933245,2021,-0.132276
0,Statewide,November,139726695385,408,129757391024,36735851,2021,-0.071349
0,Statewide,December,129628463322,406,109347218497,36929788,2021,-0.156457
0,Statewide,January,105864952892,393,108703127024,36408458,2022,0.026809
0,Statewide,February,113897908663,389,114002707099,36271500,2022,0.00092
0,Statewide,March,109872620466,371,130606010581,35678809,2022,0.188704
0,Statewide,Cumulative,1285432557390,341,1238005199083,34531625,Cumulative,-0.036896


### Output

In [18]:
hydro_rename.to_csv("data/raw/water-conservation-by-region.csv", index=False)