In [None]:
#Loading the library 
import pandas as pd
import numpy as np
import os
import math
from scipy.optimize import curve_fit

In [None]:
## This part is for calculating the CN values and ranking them based on precipitation events
# Function using the NRCS-CN method
def quickflow_scs(CN, P, a):
    """
    Calculate runoff using the NRCS method.
    Parameters:
    - S: Soil retention potential
    - P: Total precipitation
    - a: Initial abstraction ratio 
    Returns:
    - Q: Quickflow (runoff)
    """

    S = (25400 / CN) - 254
    Ia = a * S
    return np.where(P <= Ia, 0, ((P - Ia) ** 2) / (P - Ia + S))

def calculate_values_and_curves(file_path):
    df = pd.read_csv(file_path)
    df['original_index'] = df.index
    
    # Sort values by total precipitation and quickflow independently
    df = df.sort_values(by=['total_precipitation_sum', 'Quickflow_eckhardt'], ascending=[False, False])
    
    print("Após ordenar os valores:", df.shape)
    print(df[['total_precipitation_sum', 'Quickflow_eckhardt']].head())

    
    ### Calculate CN values for 0.2 and 0.005 scenarios
    # Calculate S (retention) and CN for 0.2 scenario
    df['S_02'] = df.apply(lambda row: 5*((row['total_precipitation_sum'] + 2*row['Quickflow_eckhardt']) - 
                            math.sqrt(4*math.pow(row['Quickflow_eckhardt'],2) + 
                           5*row['total_precipitation_sum']*row['Quickflow_eckhardt'])), axis=1)
    df['CN_02'] = df.apply(lambda row: 25400/(254+row['S_02']), axis=1)
    
    # Calculate S and CN for 0.005
    df['S_005'] = df.apply(lambda row: 10*((2*row['total_precipitation_sum'] + 19*row['Quickflow_eckhardt']) - 
                             math.sqrt(361*math.pow(row['Quickflow_eckhardt'],2) + 
                             80*row['total_precipitation_sum']*row['Quickflow_eckhardt'])), axis=1)
    df['CN_005'] = df.apply(lambda row: 25400/(254+row['S_005']), axis=1)
    
    # Calculate CN0
    df['CN0'] = 100/(1 + df["total_precipitation_sum"]/2)
 
    return df


folder_path = r'Times_series\ASY\Group1' # Data series location
results_path = r''  # Output directory for results


def process_files(folder_path):
    csv_files = [os.path.join(root, name) for root, dirs, files in os.walk(folder_path) for name in files if name.endswith(".csv")]

    for file in csv_files:
        df = calculate_values_and_curves(file)  
        
        # Salva o DataFrame modificado
        output_file_path = os.path.join(results_path, os.path.basename(file))
        df.to_csv(output_file_path, index=False)

process_files(folder_path)

In [None]:
##Part two to adjust the standard behavior and find the CN value
# Error metrics for model evaluation
def error_metrics(observed, estimated):
    residuals = observed - estimated
    mse = np.mean(residuals**2)
    rmse = np.sqrt(mse)
    mae = np.mean(np.abs(residuals))
    return mse, rmse, mae

def r2(y_true, y_pred):
    correlation_matrix = np.corrcoef(y_true, y_pred)
    correlation_xy = correlation_matrix[0, 1]
    return correlation_xy ** 2

def PBIAS(observed, simulated):
    return (np.sum(observed - simulated) / np.sum(observed)) * 100

def NSE(observed, simulated):
    return 1 - (np.sum((observed - simulated) ** 2) / np.sum((observed - np.mean(observed)) ** 2))

def KGE(observed, simulated):
    r = np.corrcoef(observed, simulated)[0, 1]
    alpha = np.std(simulated) / np.std(observed)
    beta = np.mean(simulated) / np.mean(observed)
    return 1 - np.sqrt((r - 1) ** 2 + (alpha - 1) ** 2 + (beta - 1) ** 2)

def RSE(observed, simulated):
    observed = np.array(observed)
    simulated = np.array(simulated)
    n = len(observed)
    
    if n <= 2:
        return np.nan  # Return NaN if we can't compute RSE
    
    # Mean of the observed data
    y_obs_mean = np.mean(observed)
    sum_squared_diff = np.sum((observed - simulated) ** 2)
    sum_squared_total = np.sum((observed - y_obs_mean) ** 2)
    
    # If sum_squared_total is 0, RSE cannot be calculated as it would involve division by zero
    if sum_squared_total == 0:
        return np.nan
    
    return np.sqrt((n * sum_squared_diff) / ((n - 2) * sum_squared_total))

# Standard curve adjustment
def f_standard(P, k1, CNinf_stand):
     calculated_values = CNinf_stand + (100 - CNinf_stand) * np.exp(-k1 * P)
     print("Valores calculados antes do clipping:", calculated_values)
     clipped_values = np.clip(calculated_values, 0, 100) # Ensure CN values are within logical bounds
     print("Valores após clipping:", clipped_values)
     return clipped_values

# Curve fitting and metrics calculation

def fit_curve_and_calculate_metrics(file_path):
    df = pd.read_csv(file_path)

    # Extract precipitation (P), CN_02, and CN_005 values
    P = df["total_precipitation_sum"].values
    CN_02 = df["CN_02"].values
    CN_005 = df["CN_005"].values

    # Apply curve fitting for CN_02
    params_02, _ = curve_fit(f_standard, P, CN_02,p0=[0.1, 50], bounds=([-10, 0], [10, 100]))
    k1_fit_02, CNinf_stand_fit_02 = params_02
    CN_stand_estimated_02 = f_standard(P, k1_fit_02, CNinf_stand_fit_02)

    # Apply curve fitting for CN_005
    params_005, _ = curve_fit(f_standard, P, CN_005, p0=[0.1, 50], bounds=([-10, 0], [10, 100]))
    k1_fit_005, CNinf_stand_fit_005 = params_005
    CN_stand_estimated_005 = f_standard(P, k1_fit_005, CNinf_stand_fit_005)

    # Calculate metrics for CN_02
    mse_02, rmse_02, mae_02 = error_metrics(CN_02, CN_stand_estimated_02)
    pbias_02 = PBIAS(CN_02, CN_stand_estimated_02)
    nse_02 = NSE(CN_02, CN_stand_estimated_02)
    kge_02 = KGE(CN_02, CN_stand_estimated_02)
    r2_02 = r2(CN_02, CN_stand_estimated_02)
    rse_02 = RSE(CN_02, CN_stand_estimated_02)

    # Calculate metrics for CN_005
    mse_005, rmse_005, mae_005 = error_metrics(CN_005, CN_stand_estimated_005)
    pbias_005 = PBIAS(CN_005, CN_stand_estimated_005)
    nse_005 = NSE(CN_005, CN_stand_estimated_005)
    kge_005 = KGE(CN_005, CN_stand_estimated_005)
    r2_005 = r2(CN_005, CN_stand_estimated_005)
    rse_005 = RSE(CN_005, CN_stand_estimated_005)
    
    # Compile results
    results = {
        'CN_02': {'k1': k1_fit_02, 'CNinf_stand': CNinf_stand_fit_02, 'mse': mse_02, 'rmse': rmse_02, 'mae': mae_02, 'pbias': pbias_02, 'nse': nse_02, 'kge': kge_02, 'r2': r2_02,'rse': rse_02},
        'CN_005': {'k1': k1_fit_005, 'CNinf_stand': CNinf_stand_fit_005, 'mse': mse_005, 'rmse': rmse_005, 'mae': mae_005, 'pbias': pbias_005, 'nse': nse_005, 'kge': kge_005, 'r2': r2_005,'rse': rse_005}
    }

    return results

# Main function to process files within a folder and compile results into a CSV file
def process_files(folder_path, results_file_path):
    csv_files = [os.path.join(root, name) for root, dirs, files in os.walk(folder_path) for name in files if name.endswith(".csv")]

    all_results = []

    for file_path in csv_files:
        # Calcula os resultados de ajuste de curva
        metrics = fit_curve_and_calculate_metrics(file_path)

        # Prepara um dicionário com os resultados para cada métrica
        results_dict = {}
        for metric in metrics:
            for key, value in metrics[metric].items():
                results_dict[f"{metric}_{key}"] = value

        results_dict['file'] = os.path.basename(file_path)
        all_results.append(results_dict)
        
    # Compile all results into a DataFrame and save to CSV
    results_df = pd.DataFrame(all_results)
    results_df.to_csv(results_file_path, index=False)

folder_path = r"" # # Specify the folder containing the series
results_file_path = r"\CNs_stand.csv"
process_files(folder_path, results_file_path)

In [None]:
#Part three calculating the runoff from CN

cn_file_path = r"\Times_series\ASY\Group1\CNs_stand.csv"
series_folder_path = r"\Times_series\ASY\Group1"

# Function to compute error metrics
def error_metrics(observed, estimated):
    residuals = observed - estimated
    mse = np.mean(residuals**2)
    rmse = np.sqrt(mse)
    mae = np.mean(np.abs(residuals))
    return mse, rmse, mae

def r2(y_true, y_pred):
    correlation_matrix = np.corrcoef(y_true, y_pred)
    correlation_xy = correlation_matrix[0, 1]
    return correlation_xy ** 2

def PBIAS(observed, simulated):
    return (np.sum(observed - simulated) / np.sum(observed)) * 100

def NSE(observed, simulated):
    return 1 - (np.sum((observed - simulated) ** 2) / np.sum((observed - np.mean(observed)) ** 2))

def KGE(observed, simulated):
    r = np.corrcoef(observed, simulated)[0, 1]
    alpha = np.std(simulated) / np.std(observed)
    beta = np.mean(simulated) / np.mean(observed)
    return 1 - np.sqrt((r - 1) ** 2 + (alpha - 1) ** 2 + (beta - 1) ** 2)

def RSE(observed, simulated):
    observed = np.array(observed)
    simulated = np.array(simulated)
    n = len(observed)
    
    if n <= 2:
        return np.nan  # Return NaN if we can't compute RSE
    
    # Mean of the observed data
    y_obs_mean = np.mean(observed)
    sum_squared_diff = np.sum((observed - simulated) ** 2)
    sum_squared_total = np.sum((observed - y_obs_mean) ** 2)
    
    # If sum_squared_total is 0, RSE cannot be calculated as it would involve division by zero
    if sum_squared_total == 0:
        return np.nan
    
    return np.sqrt((n * sum_squared_diff) / ((n - 2) * sum_squared_total))

# Quickflow calculation using the NRCS method
def quickflow_scs(CN, P, a):
    """
    Calculate quickflow (runoff) using the NRCS method.

    Parameters:
    - S: Soil retention potential
    - P: Total precipitation
    - a: Initial abstraction ratio 
    Returns:
    - Q: Quickflow (runoff)
    """

    S = (25400 / CN) - 254
    Ia = a * S
    return np.where(P <= Ia, 0, ((P - Ia) ** 2) / (P - Ia + S))

# Initialize a list to store all metric dictionaries
all_metrics = []
df_cn = pd.read_csv(cn_file_path)

# Map file names to their full paths
file_locations = {}
for root, dirs, files in os.walk(series_folder_path):
    for file in files:
        file_locations[file] = os.path.join(root, file)
        
# Iterate over each row in the CN dataframe
      
for index, row in df_cn.iterrows():
    file_name = row['file'] + '.csv'
    cn02 = row['CN_02_CNinf_stand']
    cn005 = row['CN_005_CNinf_stand']

    # Locate and read the corresponding series file
    series_file_path = os.path.join(series_folder_path, file_name)
    
    if file_name in file_locations:
        series_file_path = file_locations[file_name]
        df_series = pd.read_csv(series_file_path)
        P = df_series["total_precipitation_sum"].values
        Q_observed = df_series["Quickflow_eckhardt"].values

        # Calculate estimated quickflow for two scenarios
        Q_estimated_02 = quickflow_scs(cn02, P, 0.2)
        Q_estimated_005 = quickflow_scs(cn005, P, 0.05)
        
        # Store metrics for lambda 0.2 scenario

        metrics_dict = {
            'file': file_name,
            'mse_02': error_metrics(Q_observed, Q_estimated_02)[0],
            'rmse_02': error_metrics(Q_observed, Q_estimated_02)[1],
            'mae_02': error_metrics(Q_observed, Q_estimated_02)[2],
            'r2_02': r2(Q_observed, Q_estimated_02),
            'pbias_02': PBIAS(Q_observed, Q_estimated_02),
            'nse_02': NSE(Q_observed, Q_estimated_02),
            'kge_02': KGE(Q_observed, Q_estimated_02),
            'rse_02': KGE(Q_observed, Q_estimated_02)
        }
    
        # Store metrics for lambda 0.05 scenario
        metrics_dict.update({
            'mse_005': error_metrics(Q_observed, Q_estimated_005)[0],
            'rmse_005': error_metrics(Q_observed, Q_estimated_005)[1],
            'mae_005': error_metrics(Q_observed, Q_estimated_005)[2],
            'r2_005': r2(Q_observed, Q_estimated_005),
            'pbias_005': PBIAS(Q_observed, Q_estimated_005),
            'nse_005': NSE(Q_observed, Q_estimated_005),
            'rse_005': KGE(Q_observed, Q_estimated_005),
        })

        all_metrics.append(metrics_dict)

        # Add estimated quickflow values to the original spreadsheet
        df_series['Q_estimated_02'] = Q_estimated_02
        df_series['Q_estimated_005'] = Q_estimated_005
        df_series.to_csv(os.path.join(series_folder_path, file_name), index=False)
    else:
     print(f"Arquivo não encontrado: {series_file_path}") 
     
# Save all collected metrics to a CSV file
metrics_df = pd.DataFrame(all_metrics)
metrics_df.to_csv(os.path.join(series_folder_path, 'results_ASY_G1.csv'), index=False)