In [1]:
import pandas as pd, glob, calendar
from dbfread import DBF

In [2]:
css_folder = r"Z:\PhD_Datasets&Analysis\Info_Inputs\Streamflow_Stations\Climate_Sensitive_Stations-GRDC"

filtered_css = pd.read_csv(css_folder + "//CSS_FINAL_SELECTION-MERGE_WITH_ALL.csv") # Read the CSV file with all CSS stations
filtered_css

Unnamed: 0,station_name,station_no,station_id,station_latitude,station_longitude,station_status,river_name,station_elevation,CATCHMENT_SIZE,NAT_STA_ID,GRDCCOUNTRY,Continent,has_monthly_k,has_daily_k,monthly_k_recession,daily_k_recession,Next_Downstream_Station,CATCHMENT_SIZE2,Priority,Val_Analysis_Monthly_k
0,ABERCROMBIE,5204121,1075206,-33.954800,149.325200,Active,ABERCROMBIE RIVER,426.55,"2631,00 km²",412028,AU - AUSTRALIA,Oceania,Yes,No,0.420650,,5204302,2631.00,432,Yes
1,ABOVE BOULEAU CREEK,4215070,1069050,50.212223,-119.538610,Active,WHITEMAN CREEK,630.00,"112,00 km²",08NM174,CA - CANADA,North America,Yes,Yes,0.350868,0.910436,4215103,112.00,773,Yes
2,"ABOVE CHENEY RESERVOIRE, KS",4125110,1069620,37.863600,-98.014800,Active,NORTH FORK NINNESCAH RIVER,443.80,"2038,30 km²",07144780,US - UNITED STATES,North America,Yes,Yes,0.417039,0.541509,4125801,2038.30,478,No
3,ABOVE FORT MCPHERSON,4208040,1068040,67.248886,-134.883060,Active,PEEL RIVER,,"70600,00 km²",10MC002,CA - CANADA,North America,Yes,Yes,0.625278,0.942386,,70600.00,59,No
4,"ABOVE HAMBURG, IA",4122160,1068596,40.632500,-95.625800,Active,NISHNABOTNA RIVER,272.54,"7267,50 km²",06810000,US - UNITED STATES,North America,Yes,Yes,0.549697,0.912838,4122903,7267.50,264,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
804,"WOODFORDS, CA (WEST FORK)",4118100,1068421,38.769600,-119.833800,Active,CARSON RIVER,1753.97,"169,38 km²",10310000,US - UNITED STATES,North America,Yes,Yes,0.593689,0.900646,,169.38,732,Yes
805,XAMBIOA,3649418,1071023,-6.409700,-48.542200,Active,RIO ARAGUAIA,118.00,"364500,00 km²",28300000,BR - BRAZIL,South America,Yes,Yes,0.627802,0.974853,3649419,364500.00,20,No
806,XAVANTINA,3649617,1074892,-14.672500,-52.355000,Active,RIO DAS MORTES,266.00,"24950,00 km²",26100000,BR - BRAZIL,South America,Yes,Yes,0.786505,0.977071,3649618,24950.00,123,No
807,"YADKIN COLLEGE, NC",4148321,1069484,35.856700,-80.386900,Active,YADKIN RIVER,194.60,"5905,00 km²",02116500,US - UNITED STATES,North America,Yes,Yes,0.718625,0.896280,4148320,5905.00,294,Yes


In [3]:
wyield_folder = r"Z:\PhD_Datasets&Analysis\Outputs\T&M_WBM\wyield4"

years = range(1958, 2023 + 1)

# Read the zonal statistics files for each year and concatenate them into a single DataFrame
zonal_files = []
for year in years:
    zonal_files.extend(glob.glob(wyield_folder + f"\\wyield_zonal_statistics_{year}.csv"))
zonal_files.sort()
zonal_df = pd.concat([pd.read_csv(file) for file in zonal_files], ignore_index=True)
zonal_df = zonal_df.rename(columns={"grdcno_int": "station_no"})
zonal_df = zonal_df.set_index("station_no")
zonal_df["DATE"] = zonal_df["YEAR"].astype(str) + "-" + zonal_df["MONTH"].astype(str).str.zfill(2)
zonal_df["DATE"] = pd.to_datetime(zonal_df["DATE"], format="%Y-%m").dt.strftime("%Y-%m")
zonal_df = zonal_df[["YEAR", "MONTH", "DATE", "COUNT", "MEAN"]]

zonal_df

Unnamed: 0_level_0,YEAR,MONTH,DATE,COUNT,MEAN
station_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3617110,1958,1,1958-01,29626.0,138.502874
3617110,1958,2,1958-02,29555.0,97.228306
3617110,1958,3,1958-03,29509.0,109.915452
3617110,1958,4,1958-04,29483.0,47.283307
3617110,1958,5,1958-05,29476.0,18.090492
...,...,...,...,...,...
6870640,2022,8,2022-08,222.0,30.776309
6870640,2022,9,2022-09,222.0,21.473289
6870640,2022,10,2022-10,222.0,50.456202
6870640,2022,11,2022-11,222.0,7.092344


In [4]:
def calculate_streamflow(year, month, wyield, area_km2):
    num_days = calendar.monthrange(int(year), int(month))[1]
    streamflow = (wyield * area_km2 * 1000) / (num_days * 24 *60 *60)  # Convert to m3/s
    return streamflow

In [5]:
# Ensure the required columns exist in filtered_css
css_stations = filtered_css.loc[filtered_css["has_monthly_k"] == "Yes"].set_index("station_no") # Filter for stations with monthly k recession

# Create a dictionary mapping station_no to CATCHMENT_SIZE2
catchment_size_dict = css_stations["CATCHMENT_SIZE2"].to_dict()

# Calculate FLOW_CMS
zonal_df["FLOW_CMS"] = zonal_df.apply(
    lambda row: calculate_streamflow(row["YEAR"], row["MONTH"], row["MEAN"], catchment_size_dict[row.name]),
    axis=1
)

zonal_df.to_csv(wyield_folder + f"\\wyield4_zonal_statistics_{years[0]}-{years[-1]}.csv")
zonal_df

Unnamed: 0_level_0,YEAR,MONTH,DATE,COUNT,MEAN,FLOW_CMS
station_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
3617110,1958,1,1958-01,29626.0,138.502874,31492.028929
3617110,1958,2,1958-02,29555.0,97.228306,24475.875553
3617110,1958,3,1958-03,29509.0,109.915452,24991.976592
3617110,1958,4,1958-04,29483.0,47.283307,11109.388159
3617110,1958,5,1958-05,29476.0,18.090492,4113.317467
...,...,...,...,...,...,...
6870640,2022,8,2022-08,222.0,30.776309,22.820247
6870640,2022,9,2022-09,222.0,21.473289,16.452914
6870640,2022,10,2022-10,222.0,50.456202,37.412641
6870640,2022,11,2022-11,222.0,7.092344,5.434180


In [6]:
zonal_df.describe().map(lambda x: f"{x:.2f}")

Unnamed: 0,YEAR,MONTH,COUNT,MEAN,FLOW_CMS
count,630240.0,630240.0,630240.0,630240.0,630240.0
mean,1990.0,6.5,2584.58,41.91,1187.52
std,18.76,3.45,13504.52,71.31,10240.91
min,1958.0,1.0,2.0,0.0,0.0
25%,1974.0,3.75,44.0,3.55,2.38
50%,1990.0,6.5,191.5,10.15,12.75
75%,2006.0,9.25,700.0,51.47,75.59
max,2022.0,12.0,219376.0,1759.14,409606.82


In [7]:
zonal_df[zonal_df["COUNT"] == 1] # Check for calculations with only one pixel

Unnamed: 0_level_0,YEAR,MONTH,DATE,COUNT,MEAN,FLOW_CMS
station_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


In [8]:
# Paths to input datasets
root_folder = r"Z:\PhD_Datasets&Analysis\Info_Inputs"
drain_areas_folder = root_folder + "\\Streamflow_Sts_Drainage_Areas"

# Read the shapefile using DBF
drain_areas_tb = DBF(drain_areas_folder + "\\GRDC_Watersheds\\CSS-WATERSHEDS-MERGE_FINAL_SELECTION.dbf")
# Convert to DataFrame
drain_areas_df = pd.DataFrame(iter(drain_areas_tb)).set_index("grdc_no")
drain_areas_df.index = drain_areas_df.index.astype(int)  # Convert index to int
drain_areas_df

Unnamed: 0_level_0,river,station,area,altitude,lat_org,long_org,lat_pp,long_pp,dist_km,area_calc,...,Continent,has_monthl,has_daily_,monthly_k_,daily_k_re,Next_Downs,CATCHMENT_,Priority,mon_k_adju,Val_An_M_k
grdc_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3617110,RIO MAMORE,GUAJARA-MIRIM,609000.0,109.29,-10.7925,-65.3478,-10.7938,-65.3479,0.1,612073.0,...,South America,Yes,Yes,0.736213,0.982211,3627041,609000.0,17,0.736213,No
3617811,RIO GUAPORE,PRINCIPE DA BEIRA,341000.0,-999.00,-12.4267,-64.4253,-12.4271,-64.4271,0.2,341647.4,...,South America,Yes,Yes,0.733124,0.984667,3627110,341000.0,21,0.733124,Yes
3617812,RIO GUAPORE,PIMENTEIRAS,54200.0,-999.00,-13.4858,-61.0500,-13.4854,-61.0479,0.2,55743.9,...,South America,Yes,Yes,0.789579,0.985912,3627810,54200.0,74,0.789579,No
3617814,RIO GUAPORE,PONTES E LACERDA,2990.0,230.00,-15.2153,-59.3539,-15.2104,-59.3604,0.9,3020.5,...,South America,Yes,Yes,0.840667,0.959591,3627813,2990.0,412,0.840667,No
3618051,RIO NEGRO,SERRINHA,279945.0,-999.00,-0.4817,-64.8272,-0.4938,-64.8146,1.9,292404.2,...,South America,Yes,Yes,0.826092,0.977063,3618000,279945.0,27,0.826092,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6854601,IIJOKI,SUOLIJARVI -OUTLET,1313.0,150.00,65.1446,28.0690,65.1438,28.0646,0.2,1290.9,...,Europe,Yes,Yes,0.402936,0.913624,6854600,1313.0,531,0.402936,Yes
6854713,KEMIJOKI,OUNASJARVI - OUTLET,363.0,290.00,68.3961,23.7524,68.3960,23.7430,0.4,364.6,...,Europe,Yes,Yes,0.571777,0.919590,6854712,363.0,659,0.571777,Yes
6855411,VUOKSI,KAJOONJARVI - OUTLET,125.0,167.00,63.1519,28.8992,63.1440,28.9050,0.9,126.5,...,Europe,Yes,Yes,0.482329,0.945775,6855409,125.0,762,0.482329,Yes
6855412,VUOKSI,LOHNAJARVI - OUTLET,788.0,80.00,61.8351,28.3028,61.8320,28.2940,0.6,778.2,...,Europe,Yes,Yes,0.564292,0.949615,6855410,788.0,586,0.246549,Yes


In [9]:
drain_areas_df.describe().map(lambda x: f"{x:.2f}")

Unnamed: 0,area,altitude,lat_org,long_org,lat_pp,long_pp,dist_km,area_calc,grdcno_int,monthly_k_,daily_k_re,CATCHMENT_,Priority,mon_k_adju
count,809.0,809.0,809.0,809.0,809.0,809.0,809.0,809.0,809.0,809.0,809.0,809.0,809.0,809.0
mean,48298.93,132.79,32.31,-51.28,32.31,-51.28,0.33,48386.42,4646741.31,0.58,0.85,48298.93,405.0,0.58
std,265707.36,567.55,26.12,68.76,26.12,68.76,0.41,266180.23,955971.68,0.13,0.17,265707.36,233.68,0.15
min,49.0,-999.0,-43.14,-162.88,-43.14,-162.87,0.0,48.5,3617110.0,0.0,0.0,49.0,1.0,0.0
25%,650.0,37.44,31.22,-96.21,31.22,-96.21,0.2,652.9,4119286.0,0.5,0.84,650.0,203.0,0.49
50%,3167.6,184.75,42.58,-75.8,42.59,-75.8,0.2,3171.2,4149122.0,0.59,0.9,3167.6,405.0,0.59
75%,11354.6,353.86,47.41,0.58,47.42,0.58,0.3,11367.2,5304100.0,0.67,0.94,11354.6,607.0,0.67
max,4680000.0,2380.61,68.85,153.16,68.86,153.16,7.0,4671461.8,6870640.0,0.96,1.0,4680000.0,809.0,0.98
