In [1]:
import pandas as pd, glob, calendar
from dbfread import DBF

In [2]:
css_folder = r"Z:\PhD_Datasets&Analysis\Info_Inputs\Streamflow_Stations\Climate_Sensitive_Stations-GRDC"

filtered_css = pd.read_csv(css_folder + "//CSS_FINAL_SELECTION-MERGE_WITH_ALL.csv") # Read the CSV file with all CSS stations
filtered_css

Unnamed: 0,station_name,station_no,station_id,station_latitude,station_longitude,station_status,river_name,station_elevation,CATCHMENT_SIZE,NAT_STA_ID,GRDCCOUNTRY,Continent,has_monthly_k,has_daily_k,monthly_k_recession,daily_k_recession,Next_Downstream_Station,CATCHMENT_SIZE2,Priority,Val_Analysis_Monthly_k
0,ABERCROMBIE,5204121,1075206,-33.954800,149.325200,Active,ABERCROMBIE RIVER,426.55,"2631,00 km²",412028,AU - AUSTRALIA,Oceania,Yes,No,0.420650,,5204302,2631.00,432,Yes
1,ABOVE BOULEAU CREEK,4215070,1069050,50.212223,-119.538610,Active,WHITEMAN CREEK,630.00,"112,00 km²",08NM174,CA - CANADA,North America,Yes,Yes,0.350868,0.910436,4215103,112.00,773,Yes
2,"ABOVE CHENEY RESERVOIRE, KS",4125110,1069620,37.863600,-98.014800,Active,NORTH FORK NINNESCAH RIVER,443.80,"2038,30 km²",07144780,US - UNITED STATES,North America,Yes,Yes,0.417039,0.541509,4125801,2038.30,478,No
3,ABOVE FORT MCPHERSON,4208040,1068040,67.248886,-134.883060,Active,PEEL RIVER,,"70600,00 km²",10MC002,CA - CANADA,North America,Yes,Yes,0.625278,0.942386,,70600.00,59,No
4,"ABOVE HAMBURG, IA",4122160,1068596,40.632500,-95.625800,Active,NISHNABOTNA RIVER,272.54,"7267,50 km²",06810000,US - UNITED STATES,North America,Yes,Yes,0.549697,0.912838,4122903,7267.50,264,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
804,"WOODFORDS, CA (WEST FORK)",4118100,1068421,38.769600,-119.833800,Active,CARSON RIVER,1753.97,"169,38 km²",10310000,US - UNITED STATES,North America,Yes,Yes,0.593689,0.900646,,169.38,732,Yes
805,XAMBIOA,3649418,1071023,-6.409700,-48.542200,Active,RIO ARAGUAIA,118.00,"364500,00 km²",28300000,BR - BRAZIL,South America,Yes,Yes,0.627802,0.974853,3649419,364500.00,20,No
806,XAVANTINA,3649617,1074892,-14.672500,-52.355000,Active,RIO DAS MORTES,266.00,"24950,00 km²",26100000,BR - BRAZIL,South America,Yes,Yes,0.786505,0.977071,3649618,24950.00,123,No
807,"YADKIN COLLEGE, NC",4148321,1069484,35.856700,-80.386900,Active,YADKIN RIVER,194.60,"5905,00 km²",02116500,US - UNITED STATES,North America,Yes,Yes,0.718625,0.896280,4148320,5905.00,294,Yes


In [3]:
q_folder = r"Z:\PhD_Datasets&Analysis\Info_Inputs\TerraClimate\GeoTIFF"

years = range(1958, 2023 + 1)

# Read the zonal statistics files for each year and concatenate them into a single DataFrame
zonal_files = []
for year in years:
    zonal_files.extend(glob.glob(q_folder + f"\\q_zonal_statistics_{year}.csv"))
zonal_files.sort()
zonal_df = pd.concat([pd.read_csv(file) for file in zonal_files], ignore_index=True)
zonal_df = zonal_df.rename(columns={"grdcno_int": "station_no"})
zonal_df = zonal_df.set_index("station_no")
zonal_df["DATE"] = zonal_df["YEAR"].astype(str) + "-" + zonal_df["MONTH"].astype(str).str.zfill(2)
zonal_df["DATE"] = pd.to_datetime(zonal_df["DATE"], format="%Y-%m").dt.strftime("%Y-%m")
zonal_df = zonal_df[["YEAR", "MONTH", "DATE", "COUNT", "MEAN"]]

zonal_df

Unnamed: 0_level_0,YEAR,MONTH,DATE,COUNT,MEAN
station_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3617110,1958,1,1958-01,29790.0,133.782209
3617110,1958,2,1958-02,29790.0,94.737076
3617110,1958,3,1958-03,29790.0,108.573941
3617110,1958,4,1958-04,29790.0,46.757264
3617110,1958,5,1958-05,29790.0,17.740483
...,...,...,...,...,...
6870640,2023,8,2023-08,228.0,4.524123
6870640,2023,9,2023-09,228.0,69.341228
6870640,2023,10,2023-10,228.0,0.139035
6870640,2023,11,2023-11,228.0,0.000000


In [4]:
def calculate_streamflow(year, month, q, area_km2):
    num_days = calendar.monthrange(int(year), int(month))[1]
    streamflow = (q * area_km2 * 1000) / (num_days * 24 *60 *60)  # Convert to m3/s
    return streamflow

In [5]:
# Ensure the required columns exist in filtered_css
css_stations = filtered_css.set_index("station_no")

# Create a dictionary mapping station_no to CATCHMENT_SIZE2
catchment_size_dict = css_stations["CATCHMENT_SIZE2"].to_dict()

# Calculate FLOW_CMS
zonal_df["FLOW_CMS"] = zonal_df.apply(
    lambda row: calculate_streamflow(row["YEAR"], row["MONTH"], row["MEAN"], catchment_size_dict[row.name]),
    axis=1
)

zonal_df.to_csv(q_folder + f"\\q_zonal_statistics_{years[0]}-{years[-1]}.csv")
zonal_df

Unnamed: 0_level_0,YEAR,MONTH,DATE,COUNT,MEAN,FLOW_CMS
station_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
3617110,1958,1,1958-01,29790.0,133.782209,30418.669786
3617110,1958,2,1958-02,29790.0,94.737076,23848.743141
3617110,1958,3,1958-03,29790.0,108.573941,24686.951172
3617110,1958,4,1958-04,29790.0,46.757264,10985.792395
3617110,1958,5,1958-05,29790.0,17.740483,4033.734461
...,...,...,...,...,...,...
6870640,2023,8,2023-08,228.0,4.524123,3.354580
6870640,2023,9,2023-09,228.0,69.341228,53.129506
6870640,2023,10,2023-10,228.0,0.139035,0.103093
6870640,2023,11,2023-11,228.0,0.000000,0.000000


In [6]:
zonal_df.describe()

Unnamed: 0,YEAR,MONTH,COUNT,MEAN,FLOW_CMS
count,639936.0,639936.0,639936.0,639936.0,639936.0
mean,1990.5,6.5,2674.314219,31.962993,1112.127875
std,19.050387,3.452055,13839.520289,67.028768,10086.173576
min,1958.0,1.0,3.0,0.0,0.0
25%,1974.0,3.75,45.0,1.446087,0.439985
50%,1990.5,6.5,197.0,4.198039,4.684244
75%,2007.0,9.25,726.0,27.730058,33.818157
max,2023.0,12.0,221298.0,1759.14,404117.611274


In [7]:
zonal_df[zonal_df["COUNT"] == 1] # Check for calculations with only one pixel

Unnamed: 0_level_0,YEAR,MONTH,DATE,COUNT,MEAN,FLOW_CMS
station_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
