In [31]:
import numpy as np
import pandas as pd
from datetime import date

import codebase

- below Hoover Dam: 4152103
- Lee's Ferry: 4152450
- US-Mexico border: 4152050
- upstream of Lake Powell (San Juan River trib): 4152600

In [None]:
## define experimental set-up

# grdc stored as floats in the downlaod jsons
GRDC_ID = 4152450
GRDC_sub_IDs = [4152600]
dam_name = 'glen canyon'
start_year = 2018
stop_year_ex = 2024

In [3]:
## Other variables and filepaths
grdc_dir = "/global/scratch/users/ann_scheliga/aux_dam_datasets/GRDC_CRB/"
met_dir = "/global/scratch/users/ann_scheliga/era5_data/"
res_dir = "/global/scratch/users/ann_scheliga/CYGNSS_daily/"

In [None]:
# For debugging
def check_data_format(df):
    print(df.head(2))
    print(df.tail(2))
    print('structure type:',type(df))
    print('index type:',type(df.index))
    print('first index:',df.index[0])
    print('Inferred frequency:',df.index.inferred_freq)


In [None]:
# Create output dataframe
full_time = pd.date_range(start=date(start_year,1,1), end=date(stop_year_ex,1,1),freq='D')
output_df = pd.DataFrame(index = full_time)

In [46]:
## import sw_area
sw_area = codebase.load_data.load_daily_reservoir_CYGNSS_area(
    dam_name, filepath=res_dir
)

output_df['SW_area'] = sw_area
check_data_format(sw_area)

2019-01-01    380.752060
2019-01-02    362.621009
Name: Area km2, dtype: float64
2023-12-31    298.685200
2024-01-01    293.913871
Name: Area km2, dtype: float64
structure type: <class 'pandas.core.series.Series'>
index type: <class 'pandas.core.indexes.datetimes.DatetimeIndex'>
first index: 2019-01-01 00:00:00
Inferred frequency: D


In [None]:
## Calculate SW_flag
output_df['SW_flag'] = 0
# where SW_area has a value, SW_flag is true
output_df.loc[~output_df['SW_area'].isna(),'SW_flag'] = 1 

check_data_format(output_df['SW_flag'])

2018-01-01    0
2018-01-02    0
Freq: D, Name: SW_flag, dtype: int64
2023-12-31    1
2024-01-01    1
Freq: D, Name: SW_flag, dtype: int64
structure type: <class 'pandas.core.series.Series'>
index type: <class 'pandas.core.indexes.datetimes.DatetimeIndex'>
first index: 2018-01-01 00:00:00
Inferred frequency: D


In [None]:
## import GRDC
watershed_gpd, grdc_Q = codebase.load_data.load_GRDC_station_data_by_ID(
    grdc_id,
    filepath=grdc_dir,
    timeseries_dict={"start_year": 2019, "stop_year": 2024},
)