In [34]:
import pandas as pd, os, shutil

In [None]:
# Whole-period daily processing
daily_folder = r"Z:\PhD_Datasets&Analysis\Info_Inputs\Streamflow_Stations\Climate_Sensitive_Stations-GRDC\2025-02-13_17-18_Daily"
# Check if the exported file can be read correctly
df_cleaned = pd.read_csv(daily_folder + "\_DataFrames\Joined_Daily_Sts_DFs.csv", index_col="YYYY-MM-DD")

# All daily data folder
ad_folder = daily_folder + "//Baseflow//all_data"

# Ensure output directories exist
if not os.path.exists(ad_folder):
    os.mkdir(ad_folder)

# Copy file
shutil.copy(daily_folder + "//Baseflow//file.lst", ad_folder + "//file.lst")  # Copies content and metadata

# Reading file content
with open(ad_folder + "//file.lst", "r") as file:
    content = file.readlines()

position = 6
space = "     " 
header = content[:position] # Take the header of the baseflow master file
lines_to_write = []

stations = df_cleaned.columns.to_list()

for id_station in stations:

    print("Processing station " + id_station)
 
    # Setting up files for the following baseflow calculation
    temp_df1 = df_cleaned[id_station].reset_index()
    temp_df1 = temp_df1[temp_df1[id_station].notnull()] # Remove rows that have NaN values in the specified column only.
    temp_df1[id_station] = temp_df1[id_station].round(2) # Convert to float with two decimals to follow the example file of the baseflow calculation algorithm
    temp_df1 = temp_df1.rename(columns={"YYYY-MM-DD":"Date", id_station:"FlowValue"}) # Rename columns to fit the required format for baseflow calculation
    temp_df1["Date"] = temp_df1["Date"].apply(lambda row:  row.replace("-","")) # There must be no spaces in the date portion (YYYYMMDD) of the line
    temp_df1 = temp_df1.set_index("Date")
    temp_df1.to_csv(ad_folder + "//" + id_station + ".csv", sep=" ") # Save file using space delimited format for baseflow calculations

    lines_to_write.append(id_station + ".csv" + space + id_station + ".out\n")

new_content = header + lines_to_write

# Writing modified content back
with open(ad_folder + "//file.lst", "w") as file:
    file.write(''.join(new_content))

Processing station 3617110
Processing station 3617811
Processing station 3617812
Processing station 3617814
Processing station 3618051
Processing station 3618052
Processing station 3618500
Processing station 3618700
Processing station 3618711
Processing station 3618720
Processing station 3618721
Processing station 3618722
Processing station 3618950
Processing station 3618951
Processing station 3621200
Processing station 3623100
Processing station 3625000
Processing station 3626000
Processing station 3627000
Processing station 3628400
Processing station 3628701
Processing station 3628900
Processing station 3629001
Processing station 3630600
Processing station 3631100
Processing station 3632400
Processing station 3633120
Processing station 3633123
Processing station 3633160
Processing station 3633180
Processing station 3633301
Processing station 3634150
Processing station 3634160
Processing station 3634320
Processing station 3634340
Processing station 3634350
Processing station 3634360
P

In [None]:
# Normal and Leap years daily processing
daily_folder = r"Z:\PhD_Datasets&Analysis\Info_Inputs\Streamflow_Stations\Climate_Sensitive_Stations-GRDC\2025-02-13_17-18_Daily"
# Check if the exported file can be read correctly
df_cleaned = pd.read_csv(daily_folder + "\_DataFrames\Joined_Daily_Sts_DFs.csv", index_col="YYYY-MM-DD")

# Leap and normal years daily data folders
ly_folder = daily_folder + "//leap_year"
ny_folder = daily_folder + "//normal_year"

# Ensure output directories exist
if not os.path.exists(ly_folder):
    os.mkdir(ly_folder) 
if not os.path.exists(ny_folder):
    os.mkdir(ny_folder)

In [26]:
# Make a copy of the original df
temp_df2 = df_cleaned.copy()

# Ensure index is a datetime type
temp_df2.index = pd.to_datetime(temp_df2.index, format='%Y-%m-%d')

# Identify leap years and normal years
leap_years = temp_df2.index.year[(temp_df2.index.year % 4 == 0) & ((temp_df2.index.year % 100 != 0) | (temp_df2.index.year % 400 == 0))]
normal_years = temp_df2.index.year[~temp_df2.index.year.isin(leap_years)]

# Create a 'day of year' column
temp_df2['DOY'] = temp_df2.index.dayofyear

# Separate leap year and normal year data
df_leap = temp_df2[temp_df2.index.year.isin(leap_years)]
df_normal = temp_df2[temp_df2.index.year.isin(normal_years)]

# Compute daily mean across all leap years - Leap Year Daily Averages
avg_leap = df_leap.groupby("DOY").mean()

# Compute daily mean across all normal years - Normal Year Daily Averages
avg_normal = df_normal.groupby("DOY").mean()

# Drop the 'DOY' column (since it's now the index)
avg_leap.drop(columns=['DOY'], inplace=True, errors='ignore')
avg_normal.drop(columns=['DOY'], inplace=True, errors='ignore')

In [27]:
print(any(avg_leap.notnull().sum() != 366))
avg_leap

False


Unnamed: 0_level_0,3617110,3617811,3617812,3617814,3618051,3618052,3618500,3618700,3618711,3618720,...,6742701,6744200,6744500,6830101,6830103,6854601,6854713,6855411,6855412,6870640
DOY,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,6299.010286,1442.119700,468.593300,54.250167,12301.288636,9195.073455,1250.966286,41.043000,61.635455,599.1497,...,30.088667,147.48125,98.25000,2.176923,2.565833,9.428000,1.903125,1.703333,7.828333,8.681429
2,6300.810286,1437.866400,468.593300,54.183500,12301.288636,9195.073455,1250.966286,41.043000,61.635455,598.7788,...,29.436667,158.73750,85.83750,2.150000,2.535000,9.202000,1.896875,1.673333,7.895000,8.605714
3,6375.510714,1485.409600,479.832500,54.323000,12404.538909,9138.888636,1208.203500,36.778000,61.131636,583.3891,...,27.593333,158.32500,99.60625,2.121538,2.534167,9.007333,1.850625,1.660000,7.944167,8.507857
4,6452.097786,1525.219400,491.097800,55.108333,12444.701545,9071.549727,1180.848500,39.314222,59.853182,578.7141,...,26.781333,136.65625,99.42500,2.111538,2.522500,8.901333,1.838125,1.630833,7.915000,8.447143
5,6527.338357,1567.317300,498.495200,56.178167,12440.480545,9026.848818,1139.506071,40.153111,62.950818,564.0146,...,26.454000,127.47500,84.97500,2.106923,2.523333,8.693333,1.823125,1.605833,7.838333,8.399286
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
362,6180.223462,1396.555667,512.345444,54.631273,13403.670100,9429.520800,1446.528615,69.900667,79.498273,971.1764,...,32.629333,151.31250,95.75000,1.862308,2.480769,10.531333,1.926250,1.707500,7.480833,9.149286
363,6290.438385,1431.124333,524.525667,55.334727,13490.507700,9556.872000,1467.954385,89.049889,79.911273,986.3294,...,33.280000,137.55625,84.60000,1.846154,2.456923,10.310667,1.937500,1.678333,7.360833,9.085000
364,6426.709923,1458.014222,538.214222,55.772273,13574.429000,9635.599700,1480.100308,90.913222,99.271636,1009.1931,...,32.586667,129.34375,82.25625,1.832308,2.433846,10.022667,1.934375,1.665833,7.241667,8.926429
365,6583.312615,1478.624222,546.290889,56.256818,13642.075500,9711.332700,1514.323923,91.482444,103.382182,1033.7224,...,31.520000,120.01875,87.58750,1.810769,2.428462,9.760667,1.927500,1.623333,7.165833,8.876429


In [28]:
print(any(avg_normal.notnull().sum() != 365))
avg_normal

False


Unnamed: 0_level_0,3617110,3617811,3617812,3617814,3618051,3618052,3618500,3618700,3618711,3618720,...,6742701,6744200,6744500,6830101,6830103,6854601,6854713,6855411,6855412,6870640
DOY,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,7680.890231,1651.769556,536.396885,60.393556,13006.376969,9042.593188,1379.867026,59.813179,76.234938,889.763063,...,35.873696,145.353061,188.557143,1.795897,2.346410,8.694375,1.8280,1.520811,6.559459,8.194318
2,7673.415667,1651.174556,536.281577,60.384111,13006.376969,9042.593188,1380.232154,59.802964,76.377125,890.820625,...,35.775217,159.457143,188.808163,1.784103,2.334872,8.619167,1.8140,1.517297,6.488108,8.167727
3,7772.126769,1677.156074,542.127577,61.511750,12959.579938,8940.618437,1384.986077,56.579107,74.560250,888.342750,...,36.910217,162.044898,167.312245,1.768205,2.321538,8.541458,1.8188,1.495676,6.515135,8.092273
4,7866.198154,1704.140037,547.396615,62.322194,12911.016156,8848.516219,1390.594641,54.815393,75.621094,867.949813,...,38.837391,168.400000,146.261224,1.754103,2.309744,8.437917,1.8038,1.462973,6.280541,8.022955
5,7961.708000,1736.985593,551.975654,63.259556,12825.626062,8792.488219,1381.774744,52.668250,67.688937,856.468156,...,39.277174,169.267347,133.479592,1.735385,2.294103,8.331667,1.7958,1.437027,6.204324,7.934091
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
361,6692.076750,1423.491429,447.034692,55.078571,12593.485424,9138.121606,1325.901487,50.991607,82.305500,740.377484,...,35.271739,127.989796,151.177143,1.975500,2.601026,8.902708,1.8836,1.553243,6.281892,8.557273
362,6818.134400,1452.452286,453.836308,55.909429,12604.912788,9113.525333,1332.152564,50.362963,84.805688,736.205871,...,35.233913,138.516327,148.654082,1.960250,2.565128,8.790208,1.8788,1.543784,6.209459,8.475000
363,6922.226725,1482.776214,460.944269,57.101086,12599.447727,9087.909667,1328.881538,47.700643,84.997625,729.705581,...,34.176957,148.051020,143.073878,1.944750,2.532821,8.696875,1.8590,1.532162,6.190270,8.362045
364,7019.310450,1515.291536,469.417500,58.156114,12580.793879,9034.179515,1317.029564,51.230429,79.235250,716.582258,...,34.479348,149.555102,159.965306,1.924500,2.506154,8.679167,1.8306,1.527838,6.185946,8.316136


In [None]:
# Function to convert DOY to date for a specific year
def doy_to_date(doy, year):
    return (pd.to_datetime(f'{year}-01-01') + pd.to_timedelta(doy - 1, unit='D')).strftime('%Y-%m-%d')

In [30]:
# Convert DOY index to dates for normal and leap years
avg_leap['Date'] = avg_leap.index.map(lambda doy: doy_to_date(doy, 2024))  # Leap Year Example
avg_normal['Date'] = avg_normal.index.map(lambda doy: doy_to_date(doy, 2023))  # Normal Year Example

In [31]:
avg_leap

Unnamed: 0_level_0,3617110,3617811,3617812,3617814,3618051,3618052,3618500,3618700,3618711,3618720,...,6744200,6744500,6830101,6830103,6854601,6854713,6855411,6855412,6870640,Date
DOY,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,6299.010286,1442.119700,468.593300,54.250167,12301.288636,9195.073455,1250.966286,41.043000,61.635455,599.1497,...,147.48125,98.25000,2.176923,2.565833,9.428000,1.903125,1.703333,7.828333,8.681429,2024-01-01
2,6300.810286,1437.866400,468.593300,54.183500,12301.288636,9195.073455,1250.966286,41.043000,61.635455,598.7788,...,158.73750,85.83750,2.150000,2.535000,9.202000,1.896875,1.673333,7.895000,8.605714,2024-01-02
3,6375.510714,1485.409600,479.832500,54.323000,12404.538909,9138.888636,1208.203500,36.778000,61.131636,583.3891,...,158.32500,99.60625,2.121538,2.534167,9.007333,1.850625,1.660000,7.944167,8.507857,2024-01-03
4,6452.097786,1525.219400,491.097800,55.108333,12444.701545,9071.549727,1180.848500,39.314222,59.853182,578.7141,...,136.65625,99.42500,2.111538,2.522500,8.901333,1.838125,1.630833,7.915000,8.447143,2024-01-04
5,6527.338357,1567.317300,498.495200,56.178167,12440.480545,9026.848818,1139.506071,40.153111,62.950818,564.0146,...,127.47500,84.97500,2.106923,2.523333,8.693333,1.823125,1.605833,7.838333,8.399286,2024-01-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
362,6180.223462,1396.555667,512.345444,54.631273,13403.670100,9429.520800,1446.528615,69.900667,79.498273,971.1764,...,151.31250,95.75000,1.862308,2.480769,10.531333,1.926250,1.707500,7.480833,9.149286,2024-12-27
363,6290.438385,1431.124333,524.525667,55.334727,13490.507700,9556.872000,1467.954385,89.049889,79.911273,986.3294,...,137.55625,84.60000,1.846154,2.456923,10.310667,1.937500,1.678333,7.360833,9.085000,2024-12-28
364,6426.709923,1458.014222,538.214222,55.772273,13574.429000,9635.599700,1480.100308,90.913222,99.271636,1009.1931,...,129.34375,82.25625,1.832308,2.433846,10.022667,1.934375,1.665833,7.241667,8.926429,2024-12-29
365,6583.312615,1478.624222,546.290889,56.256818,13642.075500,9711.332700,1514.323923,91.482444,103.382182,1033.7224,...,120.01875,87.58750,1.810769,2.428462,9.760667,1.927500,1.623333,7.165833,8.876429,2024-12-30


In [32]:
avg_normal

Unnamed: 0_level_0,3617110,3617811,3617812,3617814,3618051,3618052,3618500,3618700,3618711,3618720,...,6744200,6744500,6830101,6830103,6854601,6854713,6855411,6855412,6870640,Date
DOY,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,7680.890231,1651.769556,536.396885,60.393556,13006.376969,9042.593188,1379.867026,59.813179,76.234938,889.763063,...,145.353061,188.557143,1.795897,2.346410,8.694375,1.8280,1.520811,6.559459,8.194318,2023-01-01
2,7673.415667,1651.174556,536.281577,60.384111,13006.376969,9042.593188,1380.232154,59.802964,76.377125,890.820625,...,159.457143,188.808163,1.784103,2.334872,8.619167,1.8140,1.517297,6.488108,8.167727,2023-01-02
3,7772.126769,1677.156074,542.127577,61.511750,12959.579938,8940.618437,1384.986077,56.579107,74.560250,888.342750,...,162.044898,167.312245,1.768205,2.321538,8.541458,1.8188,1.495676,6.515135,8.092273,2023-01-03
4,7866.198154,1704.140037,547.396615,62.322194,12911.016156,8848.516219,1390.594641,54.815393,75.621094,867.949813,...,168.400000,146.261224,1.754103,2.309744,8.437917,1.8038,1.462973,6.280541,8.022955,2023-01-04
5,7961.708000,1736.985593,551.975654,63.259556,12825.626062,8792.488219,1381.774744,52.668250,67.688937,856.468156,...,169.267347,133.479592,1.735385,2.294103,8.331667,1.7958,1.437027,6.204324,7.934091,2023-01-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
361,6692.076750,1423.491429,447.034692,55.078571,12593.485424,9138.121606,1325.901487,50.991607,82.305500,740.377484,...,127.989796,151.177143,1.975500,2.601026,8.902708,1.8836,1.553243,6.281892,8.557273,2023-12-27
362,6818.134400,1452.452286,453.836308,55.909429,12604.912788,9113.525333,1332.152564,50.362963,84.805688,736.205871,...,138.516327,148.654082,1.960250,2.565128,8.790208,1.8788,1.543784,6.209459,8.475000,2023-12-28
363,6922.226725,1482.776214,460.944269,57.101086,12599.447727,9087.909667,1328.881538,47.700643,84.997625,729.705581,...,148.051020,143.073878,1.944750,2.532821,8.696875,1.8590,1.532162,6.190270,8.362045,2023-12-29
364,7019.310450,1515.291536,469.417500,58.156114,12580.793879,9034.179515,1317.029564,51.230429,79.235250,716.582258,...,149.555102,159.965306,1.924500,2.506154,8.679167,1.8306,1.527838,6.185946,8.316136,2023-12-30


In [None]:
dfs = [avg_leap, avg_normal]
folders = [ly_folder, ny_folder]

for i in range(len(dfs)):

    # Copy file
    shutil.copy(daily_folder + "//Baseflow//file.lst", folders[i] + "//file.lst")  # Copies content and metadata

    # Reading file content
    with open(folders[i] + "//file.lst", "r") as file:
        content = file.readlines()

    position = 6
    space = "     " 
    header = content[:position] # Take the header of the baseflow master file
    lines_to_write = []

    stations = df_cleaned.columns.to_list()

    for id_station in stations:

        print("Processing station " + id_station)
    
        # Setting up files for the following baseflow calculation
        temp_df3 = dfs[i]
        temp_df3 = temp_df3[["Date", id_station]].reset_index(drop=True)
        #temp_df3 = temp_df3[temp_df3[id_station].notnull()] # Remove rows that have NaN values in the specified column only.
        temp_df3[id_station] = temp_df3[id_station].round(2) # Convert to float with two decimals to follow the example file of the baseflow calculation algorithm
        temp_df3 = temp_df3.rename(columns={id_station:"FlowValue"}) # Rename columns to fit the required format for baseflow calculation
        temp_df3["Date"] = temp_df3["Date"].apply(lambda row:  row.replace("-","")) # There must be no spaces in the date portion (YYYYMMDD) of the line
        temp_df3 = temp_df3.set_index("Date")
        temp_df3.to_csv(folders[i] + "//" + id_station + ".csv", sep=" ") # Save file using space delimited format for baseflow calculations

        lines_to_write.append(id_station + ".csv" + space + id_station + ".out\n")

    new_content = header + lines_to_write

    # Writing modified content back
    with open(folders[i] + "//file.lst", "w") as file:
        file.write(''.join(new_content))