In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Assuming 'data' is your DataFrame
data = pd.read_csv('ABQ_2017_dni.csv')

# Handle NaN, infinity, or large values in the dataset
data.replace([np.inf, -np.inf], np.nan, inplace=True)
data.dropna(inplace=True)


# Extract the reference column (dni)
reference = data['dni'].values

# Extract the prediction columns
prediction_columns = ['dni_erbs', 'dni_erbsD', 'dni_ohm', 'dni_louche', 'dni_boland',  'dni_Engerer2','dni_Engerer3', 'dni_Engerer4',
                      'dni_Yang5', 'dni_Yang4', 'dni_JKT', 'dni_Mabasa'
]

# Initialize a dictionary to store the results
results = {
    'Model': [],
    'rMBE': [],
    'MBE': [],
    'MAE': [],
    'rMAE': [],
    'RMSE': [],
    'rRMSE': [],
    'R2': []
}

# Function to calculate MBE
def mean_bias_error(y_true, y_pred):
    return np.mean(y_pred - y_true)

# Iterate over each prediction column and calculate the metrics
for col in prediction_columns:
    predictions = data[col].values
    
    # Calculate metrics
    mbe = mean_bias_error(reference, predictions)
    mae = mean_absolute_error(reference, predictions)
    rmse = np.sqrt(mean_squared_error(reference, predictions))
    r2 = r2_score(reference, predictions)
    
    # Calculate relative metrics
    rMBE = mbe / np.mean(reference) * 100
    rMAE = mae / np.mean(reference) * 100
    rRMSE = rmse / np.mean(reference) * 100
    
    # Append results to the dictionary
    results['Model'].append(col)
    results['rMBE'].append(rMBE)
    results['MBE'].append(mbe)
    results['MAE'].append(mae)
    results['rMAE'].append(rMAE)
    results['RMSE'].append(rmse)
    results['rRMSE'].append(rRMSE)
    results['R2'].append(r2)

# Convert the results dictionary to a DataFrame
results_df = pd.DataFrame(results)

# Round off the results to 3 decimal digits
results_df = results_df.round(3)

# Save the results to a CSV file
results_df.to_csv('ABQ2_dni_metrics_results.csv', index=False)

# Calculate absolute value of MBE
results_df['abs_MBE'] = results_df['MBE'].abs()

# Perform mean linear ranking using absolute MBE, MAE, and RMSE
results_df['rank_abs_MBE'] = results_df['abs_MBE'].rank()
results_df['rank_MAE'] = results_df['MAE'].rank()
results_df['rank_RMSE'] = results_df['RMSE'].rank()

# Calculate mean rank
results_df['mean_rank'] = results_df[['rank_abs_MBE', 'rank_MAE', 'rank_RMSE']].mean(axis=1)

# Round off the results to 2 decimal digits
results_df = results_df.round(2)

# Add a numeric rank column based on mean_rank starting from 1
results_df['Numeric_Rank'] = results_df['mean_rank'].rank(method='min').astype(int)

# Save the final results to a CSV file
results_df.to_csv('ABQ2_dni_model_comparison_results_with_ranking.csv', index=False)

print("The model comparison results with ranking have been saved to model_comparison_results_with_ranking.csv")

The model comparison results with ranking have been saved to model_comparison_results_with_ranking.csv
