In [5]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime, date, timedelta
from tableauscraper import TableauScraper as TS
import altair as alt
import altair_latimes as lat

### Download the tableau dashboard

In [6]:
url = "https://public.tableau.com/views/CaliforniaUrbanWaterProduction_15785959527960/StatewideUseandProduction"

In [7]:
ts = TS()
ts.loads(url)
workbook = ts.getWorkbook()

In [8]:
conservation = workbook.goToSheet("Cumulative Savings ")
for t in conservation.worksheets:
    print(f"worksheet name : {t.name}")  # show worksheet name
    print(t.data)  # show dataframe for this worksheet

worksheet name : Savings by Hydro
         Region-value       Region-alias Strmonth-value Strmonth-alias  \
0               %all%              %all%     Cumulative     Cumulative   
1         Tulare Lake        Tulare Lake     Cumulative     Cumulative   
2      South Lahontan     South Lahontan     Cumulative     Cumulative   
3         South Coast        South Coast     Cumulative     Cumulative   
4   San Joaquin River  San Joaquin River     Cumulative     Cumulative   
5   San Francisco Bay  San Francisco Bay     Cumulative     Cumulative   
6    Sacramento River   Sacramento River     Cumulative     Cumulative   
7      North Lahontan     North Lahontan     Cumulative     Cumulative   
8         North Coast        North Coast     Cumulative     Cumulative   
9      Colorado River     Colorado River     Cumulative     Cumulative   
10      Central Coast      Central Coast     Cumulative     Cumulative   
11              %all%              %all%            Apr          April   
12  

Get the conservation figures for hydrologic regions and districts

In [23]:
conservation = workbook.goToSheet("Cumulative Savings ")
for t in conservation.worksheets:
    if t.name == "Savings by Hydro":
        
        filters = t.getFilters()
        #print(filters)
        filter_values = filters[0]['values']
        df_list = []
        for v in filter_values:
            wb = t.setFilter('Strmonth', v)
            monthlyWs = wb.getWorksheet("Savings by Hydro")
            #print(monthlyWs.data)
            monthly_df = monthlyWs.data
            #monthly_df["month"] = m
            df_list.append(monthly_df)
    # elif t.name == "Savings Map":
    #     district_df = t.data
    else:
        pass



In [24]:
len(df_list)

10

In [25]:
hydro_df = pd.concat(df_list)

---
### Clean up dataframe

In [26]:
hydro_df.columns

Index(['Region-value', 'Region-alias', 'Strmonth-value', 'Strmonth-alias',
       'SUM(Baseline)-alias', 'SUM(count_supplier)-alias',
       'SUM(Current)-alias', 'SUM(Total Population Served)-alias',
       'ATTR(Year)-alias', 'ATTR(Hydrologic Region)-alias',
       'AGG(Savings_calc)-alias'],
      dtype='object')

In [27]:
keeps = ['Region-value', 'Strmonth-alias',
       'SUM(Baseline)-alias', 'SUM(count_supplier)-alias',
       'SUM(Current)-alias', 'SUM(Total Population Served)-alias',
       'ATTR(Year)-alias', 'AGG(Savings_calc)-alias']

In [28]:
hydro_trim = hydro_df[keeps]

In [29]:
rename = {
    'Region-value': 'hydrologic_region', 
    'Strmonth-alias': 'month', 
    'SUM(Baseline)-alias': 'water_use_baseline',
    'SUM(count_supplier)-alias': 'supplier_count',
    'SUM(Current)-alias': 'water_use_current',
    'SUM(Total Population Served)-alias': 'total_population_served', 
    'ATTR(Year)-alias' :'year',
    'AGG(Savings_calc)-alias': 'pct_savings'  
}

In [30]:
hydro_rename = hydro_trim.rename(columns=rename)

In [31]:
hydro_rename['water_use_baseline'] = hydro_rename['water_use_baseline'].str.replace(",","").astype(int)

In [32]:
hydro_rename['water_use_current'] = hydro_rename['water_use_current'].str.replace(",","").astype(int)

In [33]:
hydro_rename.loc[(hydro_rename.hydrologic_region == "%all%"), "hydrologic_region"] = "Statewide"

In [34]:
hydro_rename.loc[(hydro_rename.year == "%null%"), "year"] = "Cumulative"

In [35]:
statewide_df = hydro_rename[hydro_rename.hydrologic_region == "Statewide"]

In [36]:
region_df = hydro_rename[hydro_rename.hydrologic_region != "Statewide"]

### Output

In [37]:
statewide_df.to_csv("../data/raw/statewide-conservation.csv", index=False)

In [38]:
region_df.to_csv("../data/raw/regional-conservation.csv", index=False)