In [114]:
import pytz
import requests
import pandas as pd
from datetime import datetime, date, timedelta

In [115]:
tz = pytz.timezone("America/Los_Angeles")
today = datetime.now(tz).date()
today

datetime.date(2023, 5, 11)

### Download normals
Source: [California Water Watch](https://cww.water.ca.gov/)

In [116]:
cww_avgs = requests.get("https://cww.water.ca.gov/service/snow-data").json()['avg']

In [117]:
avgs = pd.DataFrame.from_records(cww_avgs)

In [118]:
avgs.to_csv("../../data/processed/snow/normal-swc.csv", index=False)

### Download basin and statewide summaries
Source: [California Data Exchange Center](https://cdec.water.ca.gov/querySWC.html)

In [119]:
def queryCDEC(stations, sensor_number, duration):
    # build query
    stations = stations.replace(",","%2C")
    # sensor number -- 3 = daily snow water equivalent
    end_date = today

    # url
    url = f"https://cdec.water.ca.gov/dynamicapp/req/CSVDataServlet?Stations={stations}&SensorNums={sensor_number}&dur_code={duration}&Start=1800-01&End={end_date}"

    # to df
    df = pd.read_csv(url, parse_dates=["DATE TIME", "OBS DATE"])
    
    return df

Sensor codes:
- 304: average snow water content
- 305: percent of April 1 average
- 306: percent of normal

In [120]:
#NOR = North, CEN = Central, SOT = South, SWD = Statewide
#regions = "NOR,CEN,SOT"

In [121]:
#swc_regions_df = queryCDEC(regions, 304, "D")

In [122]:
#apr1_regions_df = queryCDEC(regions, 305, "D")

In [123]:
swc_state_df = queryCDEC("SWD", 304, "D")

In [124]:
apr1_state_df = queryCDEC("SWD", 305, "D")

In [125]:
normal_state_df = queryCDEC("SWD", 306, "D")

### Clean

In [126]:
def cleanTimeseries(df):
    
    # tests
    assert len(df.SENSOR_TYPE.unique()) == 1, "Error: multiple sensor types"
    assert len(df.UNITS.unique()) == 1, "Error: more than one unit"
    assert df.DURATION.unique()[0] == "D", "Error: not all daily data"
    
    # remove empty values
    clean_df = df[(df.VALUE != "---") & (df.DATA_FLAG != "N")].copy()
    
    # convert to float
    clean_df["VALUE"] = clean_df["VALUE"].astype(float)
    
    # convert to proper %s
    unit = clean_df.iloc[0]["UNITS"]
    if unit == "%":
        clean_df["VALUE"] = clean_df["VALUE"] / 100
        
    clean_df[ clean_df["VALUE"] >= 0]
    
    # lowercase columns
    clean_df.columns = clean_df.columns.str.lower()
    clean_df.columns = clean_df.columns.str.replace(" ","_")

    # rename value column with sensor type and date time col
    sensor_type = clean_df.iloc[0]['sensor_type'].lower().replace(" ","_")
    
    clean_df = clean_df.rename(columns={"value": sensor_type, "date_time": "date"})

    
    # trim
    trim_df = clean_df[['station_id','date',sensor_type]]
    
    return trim_df

In [127]:
#swc_regions_df_clean = cleanTimeseries(swc_regions_df)

In [128]:
#apr1_regions_df_clean = cleanTimeseries(apr1_regions_df)

In [129]:
swc_state_df_clean = cleanTimeseries(swc_state_df)

In [130]:
apr1_state_df_clean = cleanTimeseries(apr1_state_df)

In [131]:
normal_state_df_clean = cleanTimeseries(normal_state_df)

### Merge

In [132]:
# regions_merge_swc_apr1 = pd.merge(
#     swc_regions_df_clean, 
#     apr1_regions_df_clean, 
#     on=["station_id","date"], 
#     how="outer",
#     suffixes=("_swc","_apr1")
# )

In [133]:
# print(len(regions_merge_swc_apr1), len(swc_regions_df_clean), len(apr1_regions_df_clean) )

In [134]:
state_merge_swc_normal = pd.merge(
    swc_state_df_clean, 
    normal_state_df_clean, 
    on=["station_id","date"], 
    how="outer",
)

In [135]:
state_merge_normal_apr1 = pd.merge(
    state_merge_swc_normal, 
    apr1_state_df_clean, 
    on=["station_id","date"], 
    how="outer",
)

In [136]:
assert len(swc_state_df_clean) == len(state_merge_swc_normal) == len(state_merge_normal_apr1)

In [140]:
state_merge_normal_apr1

Unnamed: 0,station_id,date,snwcavg,pctnorm,pctapr1
0,SWD,2002-12-20,11.7,1.50,0.41
1,SWD,2002-12-21,12.4,1.57,0.44
2,SWD,2002-12-22,12.6,1.53,0.44
3,SWD,2002-12-23,12.6,1.50,0.44
4,SWD,2002-12-24,12.6,1.47,0.44
...,...,...,...,...,...
4859,SWD,2023-05-05,48.6,2.77,1.90
4860,SWD,2023-05-06,48.7,2.87,1.91
4861,SWD,2023-05-07,49.1,2.96,1.92
4862,SWD,2023-05-08,48.8,3.04,1.92


### Export

In [137]:
#regions_merge_swc_apr1.to_csv("../../data/raw/snow/regions-timeseries.csv", index=False)

In [138]:
state_merge_normal_apr1.to_csv("../../data/raw/snow/statewide-timeseries.csv", index=False)

In [139]:
normal_state_df_clean.to_csv("../../data/raw/snow/statewide-normals.csv", index=False)