In [23]:
import pytz
import requests
import pandas as pd
from playwright.async_api import async_playwright
from datetime import datetime, date, timedelta
import altair as alt
import altair_latimes as lat

Scrape storage data of reservoirs of interest from `https://cdec.water.ca.gov/dynamicapp/QueryDaily`

---

List of reservoir IDs

In [49]:
reservoir_list = [
    "CAS", # Castaic
    "DMV", # Diamond Valley Lake
    "MHW", # Lake Matthews
    "SLW", # Silverwood Lake
    "SKN", # Lake Skinner
    "PRR", # Lake Perris
]

Get current date

In [51]:
tz = pytz.timezone("America/Los_Angeles")

In [52]:
today = datetime.now(tz).date()

In [74]:
async with async_playwright() as playwright:
    browser = await playwright.chromium.launch()
    context = await browser.new_context(accept_downloads=True)

    # Open new page
    page = await context.new_page()

    df_list = []

    # Query database
    for res in reservoir_list:
        
        # url to query
        url = await page.goto(f"https://cdec.water.ca.gov/dynamicapp/QueryDaily?s={res}&end={today}")

        # get CSV download
        download_csv_button = "button.buttons-csv"
        await page.wait_for_selector(download_csv_button)
        await page.wait_for_function(
            f"document.querySelector('{download_csv_button}').textContent"
        )
        
        # get reservoir name
        header = "h1"
        res_name = await page.text_content(header)  # .split(": ")[1]
        print(f"Downloading data for {res_name} from {url.url}")
        await page.wait_for_timeout(5000)
        
        # download!
        async with page.expect_download() as download_info:
            await page.click(download_csv_button)
        download = await download_info.value
        
        # make dataframe
        path = await download.path()
        df = pd.read_csv(path)
        df.insert(0, "reservoir_name", res_name.split(" (")[0])
        df.insert(1, "reservoir_id", res)

        # append
        df_list.append(df)
        
        print("Done!")

    # Close context
    await context.close()
    # Close browser
    await browser.close() 

Downloading data for CASTAIC (CAS) from https://cdec.water.ca.gov/dynamicapp/QueryDaily?s=CAS&end=2022-05-24
Done!
Downloading data for LAKE SILVERWOOD (SLW) from https://cdec.water.ca.gov/dynamicapp/QueryDaily?s=SLW&end=2022-05-24
Done!
Downloading data for PERRIS (PRR) from https://cdec.water.ca.gov/dynamicapp/QueryDaily?s=PRR&end=2022-05-24
Done!


In [75]:
concat = pd.concat(df_list)

In [76]:
concat.drop(df.columns[df.columns.str.contains('unnamed',case = False)],axis = 1, inplace = True)

In [77]:
concat

Unnamed: 0,reservoir_name,reservoir_id,DATE / TIME (PST),RES ELE FEET,STORAGE AF,RES CHG AF,PPT INC INCHES
0,CASTAIC,CAS,04/24/2022,1435.22,172169,-1414,
1,CASTAIC,CAS,04/25/2022,1434.47,170983,-1186,
2,CASTAIC,CAS,04/26/2022,1435.24,172201,1218,
3,CASTAIC,CAS,04/27/2022,1436.54,174268,2067,
4,CASTAIC,CAS,04/28/2022,1436.29,173869,-399,
...,...,...,...,...,...,...,...
26,PERRIS,PRR,05/20/2022,1576.74,102027,-169,0.00
27,PERRIS,PRR,05/21/2022,1576.72,101985,-42,0.00
28,PERRIS,PRR,05/22/2022,1576.76,102069,84,0.00
29,PERRIS,PRR,05/23/2022,1576.72,101985,-84,0.00
