In [101]:
import pytz
import requests
import pandas as pd
from playwright.async_api import async_playwright
from datetime import datetime, date, timedelta
import altair as alt
import altair_latimes as lat

Scrape storage data of major reservoirs from https://cdec.water.ca.gov/dynamicapp/QueryDaily

---

List of reservoir IDs

In [102]:
reservoir_list = [
    "SHA", # Shasta
    "ORO", # Oroville
    "BUL", # New Bullards Bar
    "FOL", # Folsom
    "CLE", # Trinity
    "CMN", # Camanche
    "WRS", # Sonoma
    "SNL", #San Luis
    "NML", # New Melones
    "DNP", # Don Pedro
    "CCH", # Cachuma
    "CSI", # Casitas
    "EXC", # McClur
    "MIL", # Millerton
    "PNF", # Pine Flat
    "CAS", # Castaic
    "DMV", # Diamond Valley Lake
    #"MHW", # Lake Matthews
    #"SLW", # Silverwood Lake
    #"SKN", # Lake Skinner
    #"PRR", # Lake Perris
]

Get current date

In [103]:
tz = pytz.timezone("America/Los_Angeles")

In [104]:
today = datetime.now(tz).date()

In [105]:
async with async_playwright() as playwright:
    browser = await playwright.chromium.launch()
    context = await browser.new_context(accept_downloads=True)

    # Open new page
    page = await context.new_page()

    df_list = []

    # Query database
    for res in reservoir_list:
        
        # url to query
        url = await page.goto(f"https://cdec.water.ca.gov/dynamicapp/QueryDaily?s={res}&end={today}")

        # get CSV download
        download_csv_button = "button.buttons-csv"
        await page.wait_for_selector(download_csv_button)
        await page.wait_for_function(
            f"document.querySelector('{download_csv_button}').textContent"
        )
        
        # get reservoir name
        header = "h1"
        res_name = await page.text_content(header)  # .split(": ")[1]
        print(f"Downloading data for {res_name} from {url.url}")
        await page.wait_for_timeout(5000)
        
        # download!
        async with page.expect_download() as download_info:
            await page.click(download_csv_button)
        download = await download_info.value
        
        # make dataframe
        path = await download.path()
        df = pd.read_csv(path)
        df.insert(0, "reservoir_name", res_name.split(" (")[0])
        df.insert(1, "reservoir_id", res)

        # append
        df_list.append(df)
        
        print("Done!")

    # Close context
    await context.close()
    # Close browser
    await browser.close() 

Downloading data for SHASTA DAM  (USBR) (SHA) from https://cdec.water.ca.gov/dynamicapp/QueryDaily?s=SHA&end=2022-05-27
Done!
Downloading data for OROVILLE DAM (ORO) from https://cdec.water.ca.gov/dynamicapp/QueryDaily?s=ORO&end=2022-05-27
Done!
Downloading data for NEW BULLARDS BAR (BUL) from https://cdec.water.ca.gov/dynamicapp/QueryDaily?s=BUL&end=2022-05-27
Done!
Downloading data for FOLSOM LAKE (FOL) from https://cdec.water.ca.gov/dynamicapp/QueryDaily?s=FOL&end=2022-05-27
Done!
Downloading data for TRINITY LAKE (CLE) from https://cdec.water.ca.gov/dynamicapp/QueryDaily?s=CLE&end=2022-05-27
Done!
Downloading data for CAMANCHE RESERVOIR (CMN) from https://cdec.water.ca.gov/dynamicapp/QueryDaily?s=CMN&end=2022-05-27
Done!
Downloading data for WARM SPRINGS (USACE) (WRS) from https://cdec.water.ca.gov/dynamicapp/QueryDaily?s=WRS&end=2022-05-27
Done!
Downloading data for SAN LUIS RESERVOIR (SNL) from https://cdec.water.ca.gov/dynamicapp/QueryDaily?s=SNL&end=2022-05-27
Done!
Downloading

In [106]:
concat = pd.concat(df_list)

In [107]:
concat.drop(df.columns[df.columns.str.contains('unnamed',case = False)],axis = 1, inplace = True)

In [108]:
trim = concat[["reservoir_name", "DATE / TIME (PST)", "STORAGE AF"]] 

In [109]:
trim.columns = ["reservoir_name", "date", "storage_af"]

In [110]:
trim["date"] = pd.to_datetime(trim["date"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trim["date"] = pd.to_datetime(trim["date"])


In [111]:
trim[trim["date"] == "2022-05-26"]

Unnamed: 0,reservoir_name,date,storage_af
29,SHASTA DAM,2022-05-26,1819482
29,OROVILLE DAM,2022-05-26,1923094
29,NEW BULLARDS BAR,2022-05-26,862603
29,FOLSOM LAKE,2022-05-26,860561
29,TRINITY LAKE,2022-05-26,742191
29,CAMANCHE RESERVOIR,2022-05-26,213360
29,WARM SPRINGS,2022-05-26,139875
29,SAN LUIS RESERVOIR,2022-05-26,928092
29,NEW MELONES RESERVOIR,2022-05-26,877423
29,DON PEDRO RESERVOIR,2022-05-26,1347419
