In [1]:
import numpy as np
import requests as re
import pandas as pd
import datetime
import altair as alt
import altair_latimes as lat

In [2]:
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')

ThemeRegistry.enable('latimes')

In [3]:
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

Query all of the precip. stats on [CWW](https://cww.water.ca.gov/regionscale)

In [10]:
# atlasId='Statewide'
#atlasId='South Coast'

In [122]:
atlasIDs = [
    'Statewide',
    'North Coast',
    'Sacramento River',
    'North Lahontan',
    'San Francisco Bay',
    'San Joaquin River',
    'Central Coast',
    'Tulare Lake',
    'South Lahontan',
    'South Coast',
    'Colorado River'
]

In [123]:
df_list = []

for atlasId in atlasIDs:
    
    # get data and parse
    url = f"https://cww.water.ca.gov/service/prism/huc8/precipstatsmultiyear?years=1982,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,3&atlasIDs={atlasId}&dataScales=Watershed"
    data = re.get(url).json()['data']
    columns = data['seriesNames']
    columns.insert(0, "date")
    
    # create df
    df = pd.DataFrame(data['series'], columns=columns)
    
    # get diff
    diff_df = df.set_index("date").diff().reset_index()  
    
    diff_df["hydrologic_region"] = atlasId
    
    df_list.append(diff_df)

In [124]:
df = pd.concat(df_list)

In [85]:
# diff_df = df.set_index(["date", "hydrologic_region"]).diff().reset_index()

In [126]:
melt_df = pd.melt(
    df,
    id_vars=["date","Average","hydrologic_region"],
    var_name="water_year",
    value_name="precip_inches"
)

In [127]:
melt_df["date"] = pd.to_datetime(melt_df["date"])

In [128]:
melt_df["month"] = melt_df["date"].dt.month
melt_df["day"] = melt_df["date"].dt.day

In [129]:
melt_df

Unnamed: 0,date,Average,hydrologic_region,water_year,precip_inches,month,day
0,2023-10-01,,Statewide,1982-1983,,10,1
1,2023-10-02,0.02,Statewide,1982-1983,0.00,10,2
2,2023-10-03,0.01,Statewide,1982-1983,0.00,10,3
3,2023-10-04,0.03,Statewide,1982-1983,0.00,10,4
4,2023-10-05,0.04,Statewide,1982-1983,0.01,10,5
...,...,...,...,...,...,...,...
100645,2024-09-26,0.04,Colorado River,2023-2024,,9,26
100646,2024-09-27,0.01,Colorado River,2023-2024,,9,27
100647,2024-09-28,0.00,Colorado River,2023-2024,,9,28
100648,2024-09-29,0.00,Colorado River,2023-2024,,9,29


In [130]:
melt_df["water_year_start"] = melt_df["water_year"].str.split("-")[0][0]
melt_df["water_year_end"] = melt_df["water_year"].str.split("-")[0][1]

In [131]:
melt_df.loc[
    (melt_df.month <= 10),
    "clean_date"
] = melt_df["water_year_start"].astype(str) + "-" + melt_df["month"].astype(str) + "-" + melt_df["day"].astype(str)

In [132]:
melt_df.loc[
    (melt_df.month > 10),
    "clean_date"
] = melt_df["water_year_end"].astype(str) + "-" + melt_df["month"].astype(str) + "-" + melt_df["day"].astype(str)

In [133]:
# melt_df = melt_df[ (melt_df.date != "2024-02-29") ]

In [134]:
melt_df["clean_date"] = pd.to_datetime(melt_df["clean_date"], errors='coerce')

In [135]:
melt_df[melt_df.hydrologic_region == "San Francisco Bay"].sort_values("precip_inches")

Unnamed: 0,date,Average,hydrologic_region,water_year,precip_inches,month,day,water_year_start,water_year_end,clean_date
1465,2023-10-02,0.01,San Francisco Bay,1982-1983,0.0,10,2,1982,1983,1982-10-02
61867,2023-10-14,0.09,San Francisco Bay,2014-2015,0.0,10,14,1982,1983,1982-10-14
61866,2023-10-13,0.03,San Francisco Bay,2014-2015,0.0,10,13,1982,1983,1982-10-13
61865,2023-10-12,0.01,San Francisco Bay,2014-2015,0.0,10,12,1982,1983,1982-10-12
61864,2023-10-11,0.03,San Francisco Bay,2014-2015,0.0,10,11,1982,1983,1982-10-11
...,...,...,...,...,...,...,...,...,...,...
98449,2024-09-26,0.01,San Francisco Bay,2023-2024,,9,26,1982,1983,1982-09-26
98450,2024-09-27,0.01,San Francisco Bay,2023-2024,,9,27,1982,1983,1982-09-27
98451,2024-09-28,0.00,San Francisco Bay,2023-2024,,9,28,1982,1983,1982-09-28
98452,2024-09-29,0.01,San Francisco Bay,2023-2024,,9,29,1982,1983,1982-09-29


In [138]:
chart_region = atlasIDs[5]

alt.Chart(melt_df[melt_df.hydrologic_region == chart_region ]).mark_circle().encode(
    x="date:T",
    y="water_year",
    #color="precip_inches",
    size="precip_inches"
).properties(title=f"{chart_region} precipitation", width=800)

In [139]:
melt_df.to_csv("../../data/processed/precipitation/daily/all-regions-daily-precip.csv", index=False)