In [1]:
import pandas as pd, glob, calendar
from dbfread import DBF

In [2]:
css_folder = r"Z:\PhD_Datasets&Analysis\Info_Inputs\Streamflow_Stations\Climate_Sensitive_Stations-GRDC"

filtered_css = pd.read_csv(css_folder + "//CSS_FINAL_SELECTION_WITH_ALL.csv") # Read the CSV file with all CSS stations
filtered_css

Unnamed: 0,station_name,station_no,station_id,station_latitude,station_longitude,station_status,river_name,station_elevation,CATCHMENT_SIZE,NAT_STA_ID,GRDCCOUNTRY,Continent,Next_Downstream_Station,CATCHMENT_SIZE2,Priority,k_recession
0,ABOVE BOULEAU CREEK,4215070,1069050,50.212223,-119.538610,Active,WHITEMAN CREEK,630.00,"112,00 km²",08NM174,CA - CANADA,North America,4215103,112.00,751,0.910436
1,"ABOVE CHENEY RESERVOIRE, KS",4125110,1069620,37.863600,-98.014800,Active,NORTH FORK NINNESCAH RIVER,443.80,"2038,30 km²",07144780,US - UNITED STATES,North America,4125801,2038.30,463,0.541509
2,ABOVE FORT MCPHERSON,4208040,1068040,67.248886,-134.883060,Active,PEEL RIVER,,"70600,00 km²",10MC002,CA - CANADA,North America,,70600.00,56,0.942386
3,"ABOVE HAMBURG, IA",4122160,1068596,40.632500,-95.625800,Active,NISHNABOTNA RIVER,272.54,"7267,50 km²",06810000,US - UNITED STATES,North America,4122903,7267.50,254,0.912838
4,ABOVE HIGHWAY NO. 8 (DEMPSTER HIGHWAY),4208070,1068049,68.089165,-133.484160,Active,CARIBOU CREEK,,"625,00 km²",10LC007,CA - CANADA,North America,4208020,625.00,595,0.944144
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
783,"WOODFORDS, CA (WEST FORK)",4118100,1068421,38.769600,-119.833800,Active,CARSON RIVER,1753.97,"169,38 km²",10310000,US - UNITED STATES,North America,,169.38,712,0.900646
784,XAMBIOA,3649418,1071023,-6.409700,-48.542200,Active,RIO ARAGUAIA,118.00,"364500,00 km²",28300000,BR - BRAZIL,South America,3649419,364500.00,19,0.974853
785,XAVANTINA,3649617,1074892,-14.672500,-52.355000,Active,RIO DAS MORTES,266.00,"24950,00 km²",26100000,BR - BRAZIL,South America,3649618,24950.00,119,0.977071
786,"YADKIN COLLEGE, NC",4148321,1069484,35.856700,-80.386900,Active,YADKIN RIVER,194.60,"5905,00 km²",02116500,US - UNITED STATES,North America,4148320,5905.00,284,0.896280


In [3]:
wyield_folder = r"Z:\PhD_Datasets&Analysis\Outputs\T&M_WBM\wyield"

years = range(1958, 2023 + 1)

# Read the zonal statistics files for each year and concatenate them into a single DataFrame
zonal_files = []
for year in years:
    zonal_files.extend(glob.glob(wyield_folder + f"\\wyield_zonal_statistics_{year}.csv"))
zonal_files.sort()
zonal_df = pd.concat([pd.read_csv(file) for file in zonal_files], ignore_index=True)
zonal_df = zonal_df.rename(columns={"grdcno_int": "station_no"})
zonal_df = zonal_df.set_index("station_no")
zonal_df["DATE"] = zonal_df["YEAR"].astype(str) + "-" + zonal_df["MONTH"].astype(str).str.zfill(2)
zonal_df["DATE"] = pd.to_datetime(zonal_df["DATE"], format="%Y-%m").dt.strftime("%Y-%m")
zonal_df = zonal_df[["YEAR", "MONTH", "DATE", "COUNT", "MEAN"]]

zonal_df

Unnamed: 0_level_0,YEAR,MONTH,DATE,COUNT,MEAN
station_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3617110,1958,1,1958-01,29626.0,373.946142
3617110,1958,2,1958-02,29555.0,228.121757
3617110,1958,3,1958-03,29509.0,200.029459
3617110,1958,4,1958-04,29483.0,100.426328
3617110,1958,5,1958-05,29476.0,51.678385
...,...,...,...,...,...
6870640,2023,8,2023-08,222.0,4.873334
6870640,2023,9,2023-09,222.0,70.793220
6870640,2023,10,2023-10,222.0,35.212370
6870640,2023,11,2023-11,222.0,28.056604


In [4]:
def calculate_streamflow(year, month, wyield, area_km2):
    num_days = calendar.monthrange(int(year), int(month))[1]
    streamflow = (wyield * area_km2 * 1000) / (num_days * 24 *60 *60)  # Convert to m3/s
    return streamflow

In [5]:
# Ensure the required columns exist in filtered_css
css_stations = filtered_css.set_index("station_no")

# Create a dictionary mapping station_no to CATCHMENT_SIZE2
catchment_size_dict = css_stations["CATCHMENT_SIZE2"].to_dict()

# Calculate FLOW_CMS
zonal_df["FLOW_CMS"] = zonal_df.apply(
    lambda row: calculate_streamflow(row["YEAR"], row["MONTH"], row["MEAN"], catchment_size_dict[row.name]),
    axis=1
)

#zonal_df.to_csv(wyield_folder + f"\\wyield_zonal_statistics_{years[0]}-{years[-1]}.csv")
zonal_df

Unnamed: 0_level_0,YEAR,MONTH,DATE,COUNT,MEAN,FLOW_CMS
station_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
3617110,1958,1,1958-01,29626.0,373.946142,85025.836548
3617110,1958,2,1958-02,29555.0,228.121757,57426.484006
3617110,1958,3,1958-03,29509.0,200.029459,45481.608598
3617110,1958,4,1958-04,29483.0,100.426328,23595.537717
3617110,1958,5,1958-05,29476.0,51.678385,11750.349726
...,...,...,...,...,...,...
6870640,2023,8,2023-08,222.0,4.873334,3.613516
6870640,2023,9,2023-09,222.0,70.793220,54.242027
6870640,2023,10,2023-10,222.0,35.212370,26.109531
6870640,2023,11,2023-11,222.0,28.056604,21.497074


In [6]:
zonal_df.describe()

Unnamed: 0,YEAR,MONTH,COUNT,MEAN,FLOW_CMS
count,624096.0,624096.0,624096.0,624096.0,624096.0
mean,1990.5,6.5,2579.229885,42.209484,1203.571048
std,19.050387,3.452055,13627.223387,71.404474,10475.61801
min,1958.0,1.0,2.0,0.0,0.0
25%,1974.0,3.75,44.0,2.927794,1.937285
50%,1990.5,6.5,191.0,8.609237,11.33982
75%,2007.0,9.25,696.0,54.345415,73.2773
max,2023.0,12.0,219376.0,1759.14,710465.915347


In [7]:
zonal_df[zonal_df["COUNT"] == 1] # Check for calculations with only one pixel

Unnamed: 0_level_0,YEAR,MONTH,DATE,COUNT,MEAN,FLOW_CMS
station_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


In [8]:
# Paths to input datasets
root_folder = r"Z:\PhD_Datasets&Analysis\Info_Inputs"
drain_areas_folder = root_folder + "\\Streamflow_Sts_Drainage_Areas"

# Read the shapefile using DBF
drain_areas_tb = DBF(drain_areas_folder + "\\GRDC_Watersheds\\CSS-WATERSHEDS_FINAL_SELECTION.dbf")
# Convert to DataFrame
drain_areas_df = pd.DataFrame(iter(drain_areas_tb)).set_index("grdc_no")
drain_areas_df.index = drain_areas_df.index.astype(int)  # Convert index to int
drain_areas_df

Unnamed: 0_level_0,river,station,area,altitude,lat_org,long_org,lat_pp,long_pp,dist_km,area_calc,quality,type,comment,source,Priority,k_recessio,grdcno_int,KM2_MOLWEI,Area_Diffe
grdc_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
3617110,RIO MAMORE,GUAJARA-MIRIM,609000.0,109.29,-10.7925,-65.3478,-10.7938,-65.3479,0.1,612073.0,High,Automatic,Area difference <= 5% and distance <= 5 km,hydrosheds,16,0.982211,3617110,615599.067935,-6599.070000
3617811,RIO GUAPORE,PRINCIPE DA BEIRA,341000.0,-999.00,-12.4267,-64.4253,-12.4271,-64.4271,0.2,341647.4,High,Automatic,Area difference <= 5% and distance <= 5 km,hydrosheds,20,0.984667,3617811,343627.165175,-2627.170000
3617812,RIO GUAPORE,PIMENTEIRAS,54200.0,-999.00,-13.4858,-61.0500,-13.4854,-61.0479,0.2,55743.9,High,Automatic,Area difference <= 5% and distance <= 5 km,hydrosheds,71,0.985912,3617812,56071.101113,-1871.100000
3617814,RIO GUAPORE,PONTES E LACERDA,2990.0,230.00,-15.2153,-59.3539,-15.2104,-59.3604,0.9,3020.5,High,Automatic,Area difference <= 5% and distance <= 5 km,hydrosheds,400,0.959591,3617814,3038.065668,-48.065700
3618051,RIO NEGRO,SERRINHA,279945.0,-999.00,-0.4817,-64.8272,-0.4938,-64.8146,1.9,292404.2,High,Automatic,Area difference <= 5% and distance <= 5 km,hydrosheds,26,0.977063,3618051,294372.508181,-14427.500000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6854601,IIJOKI,SUOLIJARVI -OUTLET,1313.0,150.00,65.1446,28.0690,65.1438,28.0646,0.2,1290.9,High,Automatic,Area difference <= 5% and distance <= 5 km,hydrosheds,515,0.913624,6854601,1285.299326,27.700700
6854713,KEMIJOKI,OUNASJARVI - OUTLET,363.0,290.00,68.3961,23.7524,68.3960,23.7430,0.4,364.6,High,Automatic,Area difference <= 5% and distance <= 5 km,merit,639,0.919590,6854713,362.852993,0.147007
6855411,VUOKSI,KAJOONJARVI - OUTLET,125.0,167.00,63.1519,28.8992,63.1440,28.9050,0.9,126.5,High,Automatic,Area difference <= 5% and distance <= 5 km,merit,741,0.945775,6855411,126.027335,-1.027330
6855412,VUOKSI,LOHNAJARVI - OUTLET,788.0,80.00,61.8351,28.3028,61.8320,28.2940,0.6,778.2,High,Automatic,Area difference <= 5% and distance <= 5 km,merit,568,0.949615,6855412,775.312161,12.687800


In [9]:
drain_areas_df.describe()

Unnamed: 0,area,altitude,lat_org,long_org,lat_pp,long_pp,dist_km,area_calc,Priority,k_recessio,grdcno_int,KM2_MOLWEI,Area_Diffe
count,788.0,788.0,788.0,788.0,788.0,788.0,788.0,788.0,788.0,788.0,788.0,788.0,788.0
mean,48125.06,141.704768,32.658165,-51.780211,32.658476,-51.780142,0.326015,48215.5,394.5,0.874847,4654035.0,48424.74,-299.684357
std,267880.3,562.336346,25.753348,67.907675,25.754196,67.90781,0.409693,268362.8,227.620298,0.106813,961294.9,269771.6,4568.981479
min,49.0,-999.0,-43.1406,-162.8829,-43.1396,-162.8688,0.0,48.5,1.0,0.17755,3617110.0,48.459,-85131.4
25%,644.25,46.895,32.25895,-96.27425,32.2583,-96.27395,0.2,644.125,197.75,0.846919,4119284.0,643.3601,-38.80435
50%,3150.85,185.46,42.62875,-76.15405,42.62915,-76.15415,0.2,3117.95,394.5,0.904512,4148930.0,3121.883,-2.89569
75%,11156.4,353.9525,47.42565,0.62275,47.4292,0.62495,0.3,11233.73,591.25,0.943077,5607118.0,11257.0,11.3965
max,4680000.0,2380.61,68.8502,153.1587,68.8604,153.1604,7.0,4671462.0,788.0,0.99674,6870640.0,4701547.0,42730.0


Results of the area differences range from -85131.4 to 42730 km<sup>2</sup> with a mean value of approximately -300 km<sup>2</sup>. This allows to conclude that areas determined with the Mollweide projection tend to be bigger than those coming by default from the GRDC dataset.

In [9]:
# Create a dictionary mapping grdc_no to KM2_MOLWEI
catchment_size_dict2 = drain_areas_df["KM2_MOLWEI"].to_dict() # Area in km2 is now in the column "KM2_MOLWEI", which was calculated from the shapefile using the projection "Mollweide (World)"

# Calculate FLOW_CMS
zonal_df["FLOW_CMS2"] = zonal_df.apply(
    lambda row: calculate_streamflow(row["YEAR"], row["MONTH"], row["MEAN"], catchment_size_dict2[row.name]),
    axis=1
)

zonal_df.to_csv(wyield_folder + f"\\wyield_zonal_statistics_{years[0]}-{years[-1]}.csv")
zonal_df

Unnamed: 0_level_0,YEAR,MONTH,DATE,COUNT,MEAN,FLOW_CMS,FLOW_CMS2
station_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
3617110,1958,1,1958-01,29626.0,373.946142,85025.836548,85947.168686
3617110,1958,2,1958-02,29555.0,228.121757,57426.484006,58048.752099
3617110,1958,3,1958-03,29509.0,200.029459,45481.608598,45974.443122
3617110,1958,4,1958-04,29483.0,100.426328,23595.537717,23851.216791
3617110,1958,5,1958-05,29476.0,51.678385,11750.349726,11877.675434
...,...,...,...,...,...,...,...
6870640,2023,8,2023-08,222.0,4.873334,3.613516,3.539242
6870640,2023,9,2023-09,222.0,70.793220,54.242027,53.127115
6870640,2023,10,2023-10,222.0,35.212370,26.109531,25.572865
6870640,2023,11,2023-11,222.0,28.056604,21.497074,21.055215
