In [1]:
import pandas as pd, glob
import numpy as np

In [2]:
# Processing zonal statistics files for bflow2 or perc
root_folder = r"Z:\PhD_Datasets&Analysis\Outputs\T&M_WBM\bflow2"
prefix = "bflow"

years = range(1958, 2023 + 1)

# Read the zonal statistics files for each year and concatenate them into a single DataFrame
zonal_files = []
for year in years:
    zonal_files.extend(glob.glob(root_folder + f"\\{prefix}_zonal_statistics_{year}.csv"))
zonal_files.sort()
zonal_df = pd.concat([pd.read_csv(file) for file in zonal_files], ignore_index=True)
zonal_df = zonal_df.rename(columns={"grdcno_int": "station_no"})
zonal_df = zonal_df.set_index("station_no")
zonal_df["DATE"] = zonal_df["YEAR"].astype(str) + "-" + zonal_df["MONTH"].astype(str).str.zfill(2)
zonal_df["DATE"] = pd.to_datetime(zonal_df["DATE"], format="%Y-%m").dt.strftime("%Y-%m")
zonal_df = zonal_df[["YEAR", "MONTH", "DATE", "COUNT", "MEAN"]]

zonal_df

Unnamed: 0_level_0,YEAR,MONTH,DATE,COUNT,MEAN
station_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3617110,1958,1,1958-01,29626.0,7.484559
3617110,1958,2,1958-02,29555.0,5.581120
3617110,1958,3,1958-03,29509.0,4.138263
3617110,1958,4,1958-04,29483.0,3.069340
3617110,1958,5,1958-05,29476.0,2.279233
...,...,...,...,...,...
6870640,2023,8,2023-08,222.0,0.598420
6870640,2023,9,2023-09,222.0,1.420132
6870640,2023,10,2023-10,222.0,30.797106
6870640,2023,11,2023-11,222.0,24.522135


In [3]:
zonal_df_pivoted = zonal_df.reset_index().pivot(index='DATE', columns='station_no', values='MEAN')
zonal_df_pivoted.index.name = "YYYY-MM" # Renaming index to YYYY-MM for consistency with observed data
zonal_df_pivoted.columns.name = None # Removing the name of the columns to avoid confusion with the observed data
zonal_df_pivoted.columns = zonal_df_pivoted.columns.map(str) # Converting column names to string for consistency with observed data
zonal_df_pivoted.to_csv(root_folder + f"\\{prefix}_zonal_statistics_{years[0]}-{years[-1]}.csv")
zonal_df_pivoted

Unnamed: 0_level_0,3617110,3617811,3617812,3617814,3618051,3618052,3618500,3618700,3618711,3618720,...,6742701,6744200,6744500,6830101,6830103,6854601,6854713,6855411,6855412,6870640
YYYY-MM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1958-01,7.484559,7.428161,7.923967,8.407091,8.145369,8.085726,7.294012,6.424676,6.594522,7.698261,...,5.730342,6.961725,7.043673,6.561404,4.649185,5.229129,5.717774,10.130124,17.335681,4.212825
1958-02,5.581120,5.546272,6.280208,7.067562,6.636699,6.539864,5.335762,4.127646,4.352435,5.935646,...,11.614347,10.672928,10.006815,15.784579,10.391970,18.179496,8.532593,20.505592,18.374847,12.880703
1958-03,4.138263,4.135383,4.978593,5.941465,5.408972,5.291005,3.914872,2.651879,2.875071,4.583528,...,15.326067,13.834860,10.940518,19.490193,13.325337,17.663210,11.546070,20.356808,20.722211,17.975669
1958-04,3.069340,3.080748,3.947721,4.994793,4.421323,4.299501,4.777978,4.221971,5.029361,5.839324,...,9.863177,11.060403,8.184048,19.892824,18.311752,23.122667,14.674276,17.894348,11.700956,25.255883
1958-05,2.279233,2.297407,3.131118,4.198957,3.605134,3.480023,9.621394,2.902642,4.810377,10.551099,...,5.612700,7.604794,5.002173,13.052485,8.513473,9.316946,8.390419,8.630968,6.602756,9.597326
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08,1.089124,1.380586,1.322891,1.564264,0.008064,0.011786,2.443464,0.144626,0.462393,1.069833,...,0.578384,1.983403,0.902939,3.667914,1.049509,1.000018,2.332444,4.259922,11.576157,0.598420
2023-09,0.806723,1.025121,1.048974,1.315025,0.006359,0.009279,1.744973,0.092918,0.311779,0.803218,...,0.329133,1.364025,0.551885,2.579017,0.487936,0.407569,1.333639,2.054685,18.071234,1.420132
2023-10,0.597943,0.761868,0.831987,1.105498,0.004992,0.007271,1.247243,0.059697,0.210324,0.603993,...,0.187295,0.938099,0.337318,13.112894,15.117534,37.972613,11.399232,16.309380,13.304872,30.797106
2023-11,0.443510,0.566764,0.660063,0.929355,0.004356,0.005933,0.892320,0.038353,0.141947,0.454893,...,0.154014,4.071304,4.922911,16.543236,21.316922,42.802481,18.514031,46.762385,37.392349,24.522135


In [4]:
if prefix == "bflow":
    # Create a new DataFrame with the first row as 10s and the rest from zonal_df_pivoted except the last row, keeping the original index
    bflow_ant = pd.concat([
        pd.DataFrame([np.full(zonal_df_pivoted.shape[1], 10)], columns=zonal_df_pivoted.columns, index=["init"]),
        zonal_df_pivoted.iloc[:-1]
    ])

    # Set the index to match the original
    bflow_ant.index = zonal_df_pivoted.index
    bflow_ant.to_csv(root_folder + f"\\{prefix}_ant_zonal_statistics_{years[0]}-{years[-1]}.csv")
    print(bflow_ant)
else:
    print(f"No additional processing for '{prefix}' prefix.")

           3617110    3617811    3617812    3617814    3618051    3618052  \
YYYY-MM                                                                     
1958-01  10.000000  10.000000  10.000000  10.000000  10.000000  10.000000   
1958-02   7.484559   7.428161   7.923967   8.407091   8.145369   8.085726   
1958-03   5.581120   5.546272   6.280208   7.067562   6.636699   6.539864   
1958-04   4.138263   4.135383   4.978593   5.941465   5.408972   5.291005   
1958-05   3.069340   3.080748   3.947721   4.994793   4.421323   4.299501   
...            ...        ...        ...        ...        ...        ...   
2023-08   1.471291   1.860892   1.668737   1.860742   0.010277   0.015045   
2023-09   1.089124   1.380586   1.322891   1.564264   0.008064   0.011786   
2023-10   0.806723   1.025121   1.048974   1.315025   0.006359   0.009279   
2023-11   0.597943   0.761868   0.831987   1.105498   0.004992   0.007271   
2023-12   0.443510   0.566764   0.660063   0.929355   0.004356   0.005933   