In [123]:
import pandas as pd, glob, numpy as np
from datetime import date, timedelta

In [122]:
# Daily processing
daily_folder = r"Z:\PhD_Datasets&Analysis\Info_Inputs\Streamflow_Stations\Climate_Sensitive_Stations-GRDC\2025-02-13_17-18_Daily"
monthly_folder = r"Z:\PhD_Datasets&Analysis\Info_Inputs\Streamflow_Stations\Climate_Sensitive_Stations-GRDC\2025-02-13_17-18_Monthly"
ext = "*.txt"

# TerraClimate available period
terra_st_yr = 1958
terra_ed_yr = 2023
sdate = date(terra_st_yr, 1, 1) # start date
edate = date(terra_ed_yr, 12, 31) # end date

In [127]:
period_dates = [(sdate + timedelta(days=i)).strftime('%Y-%m-%d') for i in range((edate - sdate).days + 1)]

# Create a DF whose index corresponds to the TerraClimate dates
daily_df_sts = pd.DataFrame({"YYYY-MM-DD": period_dates}).set_index("YYYY-MM-DD")
daily_df_sts

1958-01-01
1958-01-02
1958-01-03
1958-01-04
1958-01-05
...
2023-12-27
2023-12-28
2023-12-29
2023-12-30
2023-12-31


In [128]:
# Daily processing
archivos = glob.glob(daily_folder + "//" + ext)

for archivo in archivos:

    sts_dict = {}
    file_name = archivo.split("\\")[-1]
    print("Reading file: " + file_name)

    id_station = file_name.split("_")[0]
    data_matrix = []

    # Specify encoding explicitly
    with open(archivo, 'r', encoding='ISO-8859-1') as inFile:    
        data_matrix = inFile.readlines()[37:] # starting data line in the file

    date_array = []
    value_array = []

    for data in data_matrix:
        line = data.split(";")
        line_date = line[0]

        try:
            line_value = float(line[-1])
        except ValueError:
            print(f"Skipping invalid value in {archivo}: {line[-1]}")
            continue

        if int(line_value) == -999: # This avoids saving no-data values (-999)
            line_value = np.nan

        date_array.append(line_date)
        value_array.append(line_value)

    if len(value_array) == 0: # This avoids saving files with no station data
        print(f"Skipping station {id_station} due to no data")
        continue

    sts_dict["YYYY-MM-DD"] = date_array
    sts_dict[id_station] = value_array

    # Create a DF whose index corresponds to the TerraClimate dates
    temp_df = pd.DataFrame(sts_dict).set_index("YYYY-MM-DD")
    daily_df_sts = daily_df_sts.join(temp_df) # left join on index

# Drop columns with all NaN values
df_cleaned = daily_df_sts.dropna(axis=1, how='all')

df_cleaned.to_csv(daily_folder + "\_DataFrames\Joined_Daily_Sts_DFs.csv")
df_cleaned

Reading file: 1257100_Q_Day.Cmd.txt
Reading file: 1309620_Q_Day.Cmd.txt
Reading file: 1769100_Q_Day.Cmd.txt
Reading file: 2106100_Q_Day.Cmd.txt
Reading file: 2178200_Q_Day.Cmd.txt
Reading file: 2178951_Q_Day.Cmd.txt
Reading file: 2178960_Q_Day.Cmd.txt
Reading file: 2180400_Q_Day.Cmd.txt
Reading file: 2180600_Q_Day.Cmd.txt
Reading file: 2180711_Q_Day.Cmd.txt
Reading file: 2180712_Q_Day.Cmd.txt
Reading file: 2181100_Q_Day.Cmd.txt
Reading file: 2181200_Q_Day.Cmd.txt
Reading file: 2181300_Q_Day.Cmd.txt
Reading file: 2181500_Q_Day.Cmd.txt
Reading file: 2182050_Q_Day.Cmd.txt
Reading file: 2182150_Q_Day.Cmd.txt
Reading file: 2182250_Q_Day.Cmd.txt
Reading file: 2335950_Q_Day.Cmd.txt
Skipping station 2335950 due to no data
Reading file: 2517500_Q_Day.Cmd.txt
Reading file: 2517550_Q_Day.Cmd.txt
Reading file: 2517600_Q_Day.Cmd.txt
Reading file: 2517610_Q_Day.Cmd.txt
Skipping station 2517610 due to no data
Reading file: 2517700_Q_Day.Cmd.txt
Skipping station 2517700 due to no data
Reading file: 25

Unnamed: 0_level_0,1257100,1309620,1769100,2106100,2178200,2178951,2178960,2180400,2180600,2180711,...,6854593,6854601,6854713,6854714,6854950,6855409,6855411,6855412,6870640,6887300
YYYY-MM-DD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1958-01-01,147.60,,,,,,,,,,...,4.9,,2.20,,,,,,,
1958-01-02,151.55,,,,,,,,,,...,4.7,,2.10,,,,,,,
1958-01-03,155.37,,,,,,,,,,...,4.7,,2.10,,,,,,,
1958-01-04,158.21,,,,,,,,,,...,4.7,,2.00,,,,,,,
1958-01-05,160.89,,,,,,,,,,...,4.5,,2.00,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-27,,,,,,,,,,,...,2.9,7.99,2.45,2.12,13.14,4.63,1.09,7.16,10.34,
2023-12-28,,,,,,,,,,,...,2.7,7.91,2.44,2.09,11.94,4.53,1.08,7.08,10.23,
2023-12-29,,,,,,,,,,,...,2.7,7.84,2.41,2.06,11.74,4.46,1.07,7.01,10.15,
2023-12-30,,,,,,,,,,,...,2.7,7.76,2.40,2.03,11.58,4.35,1.06,6.93,9.99,


In [100]:
years = range(terra_st_yr, terra_ed_yr + 1)
months = range(1, 12 + 1)
yr_mth= [str(a)+ "-" + str(b).zfill(2) for a in years for b in months]

# Create a DF whose index corresponds to the TerraClimate monthly dates
monthly_df_sts = pd.DataFrame({"YYYY-MM": yr_mth}).set_index("YYYY-MM")
monthly_df_sts

1958-01
1958-02
1958-03
1958-04
1958-05
...
2023-08
2023-09
2023-10
2023-11
2023-12
