In [1]:
import pandas as pd, glob, calendar

In [2]:
css_folder = r"Z:\PhD_Datasets&Analysis\Info_Inputs\Streamflow_Stations\Climate_Sensitive_Stations-GRDC"

filtered_css = pd.read_csv(css_folder + "//CSS_FINAL_SELECTION_WITH_ALL.csv") # Read the CSV file with all CSS stations
filtered_css

Unnamed: 0,station_name,station_no,station_id,station_latitude,station_longitude,station_status,river_name,station_elevation,CATCHMENT_SIZE,NAT_STA_ID,GRDCCOUNTRY,Continent,Next_Downstream_Station,CATCHMENT_SIZE2,Priority,k_recession
0,ABOVE BOULEAU CREEK,4215070,1069050,50.212223,-119.538610,Active,WHITEMAN CREEK,630.00,"112,00 km²",08NM174,CA - CANADA,North America,4215103,112.00,751,0.910436
1,"ABOVE CHENEY RESERVOIRE, KS",4125110,1069620,37.863600,-98.014800,Active,NORTH FORK NINNESCAH RIVER,443.80,"2038,30 km²",07144780,US - UNITED STATES,North America,4125801,2038.30,463,0.541509
2,ABOVE FORT MCPHERSON,4208040,1068040,67.248886,-134.883060,Active,PEEL RIVER,,"70600,00 km²",10MC002,CA - CANADA,North America,,70600.00,56,0.942386
3,"ABOVE HAMBURG, IA",4122160,1068596,40.632500,-95.625800,Active,NISHNABOTNA RIVER,272.54,"7267,50 km²",06810000,US - UNITED STATES,North America,4122903,7267.50,254,0.912838
4,ABOVE HIGHWAY NO. 8 (DEMPSTER HIGHWAY),4208070,1068049,68.089165,-133.484160,Active,CARIBOU CREEK,,"625,00 km²",10LC007,CA - CANADA,North America,4208020,625.00,595,0.944144
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
783,"WOODFORDS, CA (WEST FORK)",4118100,1068421,38.769600,-119.833800,Active,CARSON RIVER,1753.97,"169,38 km²",10310000,US - UNITED STATES,North America,,169.38,712,0.900646
784,XAMBIOA,3649418,1071023,-6.409700,-48.542200,Active,RIO ARAGUAIA,118.00,"364500,00 km²",28300000,BR - BRAZIL,South America,3649419,364500.00,19,0.974853
785,XAVANTINA,3649617,1074892,-14.672500,-52.355000,Active,RIO DAS MORTES,266.00,"24950,00 km²",26100000,BR - BRAZIL,South America,3649618,24950.00,119,0.977071
786,"YADKIN COLLEGE, NC",4148321,1069484,35.856700,-80.386900,Active,YADKIN RIVER,194.60,"5905,00 km²",02116500,US - UNITED STATES,North America,4148320,5905.00,284,0.896280


In [3]:
wyield_folder = r"Z:\PhD_Datasets&Analysis\Outputs\T&M_WBM\wyield"

years = range(1958, 2023 + 1)

# Read the zonal statistics files for each year and concatenate them into a single DataFrame
zonal_files = []
for year in years:
    zonal_files.extend(glob.glob(wyield_folder + f"\\wyield_zonal_statistics_{year}.csv"))
zonal_files.sort()
zonal_df = pd.concat([pd.read_csv(file) for file in zonal_files], ignore_index=True)
zonal_df = zonal_df.rename(columns={"grdcno_int": "station_no"})
zonal_df = zonal_df.set_index("station_no")
zonal_df["DATE"] = zonal_df["YEAR"].astype(str) + "-" + zonal_df["MONTH"].astype(str).str.zfill(2)
zonal_df["DATE"] = pd.to_datetime(zonal_df["DATE"], format="%Y-%m").dt.strftime("%Y-%m")
zonal_df = zonal_df[["YEAR", "MONTH", "DATE", "COUNT", "MEAN"]]

zonal_df

Unnamed: 0_level_0,YEAR,MONTH,DATE,COUNT,MEAN
station_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3617110,1958,1,1958-01,29626.0,373.946142
3617110,1958,2,1958-02,29555.0,228.121757
3617110,1958,3,1958-03,29509.0,200.029459
3617110,1958,4,1958-04,29483.0,100.426328
3617110,1958,5,1958-05,29476.0,51.678385
...,...,...,...,...,...
6870640,2014,8,2014-08,222.0,3.757945
6870640,2014,9,2014-09,222.0,8.533331
6870640,2014,10,2014-10,222.0,17.172768
6870640,2014,11,2014-11,222.0,28.094547


In [4]:
def calculate_streamflow(year, month, wyield, area_km2):
    num_days = calendar.monthrange(int(year), int(month))[1]
    streamflow = (wyield * area_km2 * 1000) / (num_days * 24 *60 *60)  # Convert to m3/s
    return streamflow

In [5]:
# Ensure the required columns exist in filtered_css
css_stations = filtered_css.set_index("station_no")

# Create a dictionary mapping station_no to CATCHMENT_SIZE2
catchment_size_dict = css_stations["CATCHMENT_SIZE2"].to_dict()

# Calculate FLOW_CMS
zonal_df["FLOW_CMS"] = zonal_df.apply(
    lambda row: calculate_streamflow(row["YEAR"], row["MONTH"], row["MEAN"], catchment_size_dict[row.name]),
    axis=1
)

zonal_df.to_csv(wyield_folder + f"\\wyield_zonal_statistics_{years[0]}-{years[-1]}.csv")
zonal_df

Unnamed: 0_level_0,YEAR,MONTH,DATE,COUNT,MEAN,FLOW_CMS
station_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
3617110,1958,1,1958-01,29626.0,373.946142,85025.836548
3617110,1958,2,1958-02,29555.0,228.121757,57426.484006
3617110,1958,3,1958-03,29509.0,200.029459,45481.608598
3617110,1958,4,1958-04,29483.0,100.426328,23595.537717
3617110,1958,5,1958-05,29476.0,51.678385,11750.349726
...,...,...,...,...,...,...
6870640,2014,8,2014-08,222.0,3.757945,2.786469
6870640,2014,9,2014-09,222.0,8.533331,6.538269
6870640,2014,10,2014-10,222.0,17.172768,12.733392
6870640,2014,11,2014-11,222.0,28.094547,21.526146


In [6]:
zonal_df.describe()

Unnamed: 0,YEAR,MONTH,COUNT,MEAN,FLOW_CMS
count,520080.0,520080.0,520080.0,520080.0,520080.0
mean,1985.072727,6.5,2579.249489,42.462287,1208.689948
std,15.99985,3.452056,13627.323045,71.448517,10453.41508
min,1958.0,1.0,2.0,0.0,0.0
25%,1971.0,3.75,44.0,2.958336,1.978165
50%,1985.0,6.5,191.0,8.844331,11.439709
75%,1999.0,9.25,696.0,54.814346,73.473098
max,2014.0,12.0,219376.0,1759.14,710465.915347


In [7]:
zonal_df[zonal_df["COUNT"] == 1]

Unnamed: 0_level_0,YEAR,MONTH,DATE,COUNT,MEAN,FLOW_CMS
station_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
