In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [2]:
# Load the data from the CSV file
data = pd.read_csv('STEL_2016_GHIc.csv')

# Handle NaN, infinity, or large values in the dataset
data.replace([np.inf, -np.inf], np.nan, inplace=True)
data.dropna(inplace=True)


In [3]:
# Extract the reference column (ghi) and the prediction columns
reference = data['ghi'].values
predictions = {
    'Mabasa_GHIcs': data['Mabasa_GHIcs'].values,
    'SS_GHIcs': data['SS_GHIcs'].values,
    'IP_GHIcs': data['IP_GHIcs'].values,
    'BD_GHIcs': data['BD_GHIcs'].values,
    'BigaRosa_GHIcs': data['BigaRosa_GHIcs'].values,
    'Brichambaut_GHIcs': data['Brichambaut_GHIcs'].values,
    'Schulze_GHIcs': data['Schulze_GHIcs'].values,
    'Haurwitz_GHIcs': data['Haurwitz_GHIcs'].values,
    'HaurwitzM_GHIcs': data['HaurwitzM_GHIcs'].values,
    'BDM_GHIcs': data['BDM_GHIcs'].values,
    'KC_GHIcs': data['KC_GHIcs'].values,
    'KCM_GHIcs': data['KCM_GHIcs'].values,
    'ABCG_GHIcs': data['ABCG_GHIcs'].values,
    'ABCGM_GHIcs': data['ABCGM_GHIcs'].values,
    'RS_GHIcs': data['RS_GHIcs'].values,
    'DPP_GHIcs': data['DPP_GHIcs'].values,
    'YangWalsh_GHIcs': data['YangWalsh_GHIcs'].values,
    'Sharma_GHIcs': data['Sharma_GHIcs'].values,
    'ElMghouchi_GHIcs': data['ElMghouchi_GHIcs'].values,
    'HLJ_GHIcs': data['HLJ_GHIcs'].values,
    'FuRich_GHIcs': data['FuRich_GHIcs'].values,
    'TJ_GHIcs': data['TJ_GHIcs'].values,
    'ghi_McClear': data['ghi_McClear'].values
}

In [4]:
def calculate_metrics(reference, predicted):
    mbe = np.mean(predicted - reference)
    rmbe = mbe / np.mean(reference) * 100
    rmse = np.sqrt(mean_squared_error(reference, predicted))
    rrmse = rmse / np.mean(reference) * 100
    r2 = r2_score(reference, predicted)
    mae = mean_absolute_error(reference, predicted)
    rmae = mae / np.mean(reference) * 100
    return mbe, rmbe, rmse, rrmse, r2, mae, rmae

In [5]:
results = {}
for key, value in predictions.items():
    results[key] = calculate_metrics(reference, value)

# Create a DataFrame to tabulate the results
df_results = pd.DataFrame.from_dict(results, orient='index', columns=['MBE', 'rMBE', 'RMSE', 'rRMSE', 'r2', 'MAE', 'rMAE'])

# Round off the results to 3 decimal digits
df_results= df_results.round(3)

# Save the results to a CSV file
df_results.to_csv('STEL_GHIc_metrics_results.csv')

print("The results have been tabulated and saved to metrics_results.csv.")

The results have been tabulated and saved to metrics_results.csv.


In [6]:
df_results

Unnamed: 0,MBE,rMBE,RMSE,rRMSE,r2,MAE,rMAE
Mabasa_GHIcs,-0.472,-0.068,18.003,2.589,0.994,13.6,1.956
SS_GHIcs,-9.293,-1.336,21.674,3.117,0.992,17.386,2.5
IP_GHIcs,-22.715,-3.267,29.798,4.286,0.984,25.445,3.659
BD_GHIcs,-67.943,-9.771,87.699,12.613,0.865,70.24,10.102
BigaRosa_GHIcs,-34.914,-5.021,41.316,5.942,0.97,35.946,5.17
Brichambaut_GHIcs,-32.188,-4.629,37.12,5.339,0.976,33.502,4.818
Schulze_GHIcs,7.468,1.074,22.238,3.198,0.991,17.641,2.537
Haurwitz_GHIcs,-26.07,-3.749,39.634,5.7,0.972,30.334,4.363
HaurwitzM_GHIcs,-0.493,-0.071,21.8,3.135,0.992,17.014,2.447
BDM_GHIcs,2.43,0.349,36.739,5.284,0.976,31.533,4.535
