In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 


train_short_actual = pd.read_csv("TrainingShort/TrainingActual_Short.csv")
val_short_actual = pd.read_csv("ValidationShort/ValidationActual_Short.csv")
test_short_actual = pd.read_csv("TestingShort/TestingActual_Short.csv")


train_short_normal = pd.read_csv("Baselines/TrainingShort/TrainingPredictionsShort.csv")
val_short_normal = pd.read_csv("Baselines/ValidationShort/ValidationPredictionsShort.csv")
test_short_normal = pd.read_csv("Baselines/TestingShort/TestingPredictionsShort.csv")


train_short_gauss = pd.read_csv("TrainingShort/TrainingPredictionsShort_Gaussian.csv")
val_short_gauss = pd.read_csv("ValidationShort/ValidationPredictionsShort_Gaussian.csv")
test_short_gauss = pd.read_csv("TestingShort/TestingPredictionsShort_Gaussian.csv")

In [2]:
train_long_actual = pd.read_csv("TrainingLong/TrainingActual_Long.csv")
val_long_actual = pd.read_csv("ValidationLong/ValidationActual_Long.csv")
test_long_actual = pd.read_csv("TestingLong/TestingActual_Long.csv")

train_long_preds = pd.read_csv("Baselines/TrainingLong/TrainingPredictionsLong_Gaussian.csv")
val_long_preds = pd.read_csv("Baselines/ValidationLong/ValidationPredictionsLong_Gaussian.csv")
test_long_preds = pd.read_csv("Baselines/TestingLong/TestingPredictionsLong_Gaussian.csv")

In [3]:
from sklearn.metrics import mean_absolute_error, root_mean_squared_error, mean_absolute_percentage_error

ncols_train = train_short_gauss.shape[1]
ncols_val = val_short_gauss.shape[1]
ncols_test = test_short_gauss.shape[1]

def evaluate(output, preds):
    errors_mae = []
    errors_mape = [] 
    errors_rmse = [] 
    ncols = output.shape[1] 
    for i in range(ncols): 
        output_col = output.iloc[:, i]
        preds_col = preds.iloc[:, i]
        
        errors_mae.append(mean_absolute_error(output_col, preds_col))
        errors_mape.append(mean_absolute_percentage_error(output_col, preds_col))
        errors_rmse.append(root_mean_squared_error(output_col, preds_col))
        
    
    errors_mae = np.array(errors_mae)
    errors_mape = np.array(errors_mape)
    errors_rmse = np.array(errors_rmse)


    return np.sum(errors_mae)/ncols, np.sum(errors_rmse)/ncols, np.sum(errors_mape)/ncols,



errors = evaluate(test_short_normal, test_short_actual)
print(errors)    

(253.51909336667146, 300.45656804093346, 0.1342654331852569)


# Calculating maximum and minimum electricity demand differences

In [4]:
import ast
import pandas as pd 
long_train100 = pd.read_csv("TestingLong/hundred_preds_Long.csv")
long_val100 = pd.read_csv("ValidationLong/hundred_preds_Long.csv")
long_test100 = pd.read_csv("TestingLong/hundred_preds_Long.csv")

short_train100 = pd.read_csv("TrainingShort/hundred_preds_Short.csv")
short_val100 = pd.read_csv("ValidationShort/hundred_preds_Short.csv")
short_test100 = pd.read_csv("TestingShort/hundred_preds_Short.csv")



def convert_to_floatdf(df):
    if "Unnamed: 0" in df.columns.values:
        x = df.drop(["Unnamed: 0"], inplace = False, axis =1) # drop that column. 
    else: 
        x = df.copy() 
    for column in x.columns:
        x[column] = x[column].apply(lambda x : list(map(float, ast.literal_eval(x))))
    return x  

long_train100 = convert_to_floatdf(long_train100)   
long_val100 = convert_to_floatdf(long_val100)  
long_test100 = convert_to_floatdf(long_test100) # conversion to floats. 

short_train100 = convert_to_floatdf(short_train100)     
short_val100 = convert_to_floatdf(short_val100)   
short_test100 = convert_to_floatdf(short_test100)    

In [5]:
import numpy as np 
def calculate_encapsulation_frequency(preds_df, actuals_df, lower_percentile = 2.5, upper_percentile = 97.5):
    encapsulation_count = 0
    total_count = 0

    rows_len, cols_len = preds_df.shape[0], preds_df.shape[1]
    
    for i in range(rows_len): ## go by each row and col.
        for j in range(cols_len): 
            actual_val = actuals_df.iloc[i, j] # get the actual val. 
            
            list_val = preds_df.iloc[i,j] # get the list. 
            upper_bound = np.percentile(list_val, upper_percentile)
            lower_bound = np.percentile(list_val, lower_percentile) # get the upper and lower bounds. 
            
            if lower_bound <= actual_val <= upper_bound:
                encapsulation_count +=1 
            total_count+=1 
    
    return encapsulation_count/total_count * 100 
    

actual_test_long = pd.read_csv("TestingLong/TestingActual_Long.csv")
actual_test_short = pd.read_csv("TestingShort/TestingActual_Short.csv")
test_encap = calculate_encapsulation_frequency(short_test100, actual_test_short, 5, upper_percentile=95)

# Bias

In [6]:
def convert_columns(df):
    return pd.to_datetime(df.columns.str.extract(r'(\d{1,2}/\d{1,2}/\d{4})')[0], format='%m/%d/%Y', errors='coerce')

In [137]:
from tenacity import dataclass_kwargs


data = "Testing"


normal_short = pd.read_csv(f"/home/jik19004/FilesToRun/BayesianTimeSeries/Baselines/{data}Short/{data}PredictionsShort.csv")
normal_long = pd.read_csv(f"/home/jik19004/FilesToRun/BayesianTimeSeries/Baselines/{data}Long/{data}PredictionsLong.csv")

bayesian_short_gauss = pd.read_csv(f"{data}Short/{data}PredictionsShort_Gaussian.csv")
bayesian_long_gauss = pd.read_csv(f"/home/jik19004/FilesToRun/BayesianTimeSeries/{data}Long/{data}PredictionsLong_Gaussian.csv")

normal_short.columns = convert_columns(normal_short)
normal_long.columns = convert_columns(normal_long)

bayesian_short_gauss.columns = convert_columns(bayesian_short_gauss)
bayesian_long_gauss.columns = convert_columns(bayesian_long_gauss) 

actual_short = pd.read_csv(f"/home/jik19004/FilesToRun/BayesianTimeSeries/{data}Short/{data}Actual_Short.csv")
actual_short.columns = convert_columns(actual_short) # convert the actual_short. 

actual_long = pd.read_csv(f"/home/jik19004/FilesToRun/BayesianTimeSeries/{data}Long/{data}Actual_Long.csv")
actual_long.columns = convert_columns(actual_long)


def calculate_bias_all(actuals, predictions):
    """
    Calculate the bias between all values in the actuals and predictions datasets.
    Bias is defined as the mean difference between predictions and actuals.
    """
    # Flatten the actuals and predictions into 1D arrays
    actual_values = actuals.values.flatten()
    prediction_values = predictions.values.flatten()
    
    # Remove NaN values
    mask = ~np.isnan(actual_values) & ~np.isnan(prediction_values)
    actual_values = actual_values[mask]
    prediction_values = prediction_values[mask]
    
    # Calculate bias
    bias = np.sum((prediction_values - actual_values)) / len(prediction_values)
    return bias

bias = calculate_bias_all(actual_short, normal_short)
print(f"{data}, {bias}")

Testing, -37.073497919936045


In [138]:
from sklearn.metrics import r2_score
from torch import normal

def calculate_r2(actuals, predictions):
    """
    Calculate the R² coefficient between actuals and predictions.
    R² measures how well the predictions approximate the actual values.
    """
    # Flatten the actuals and predictions into 1D arrays
    actual_values = actuals.values.flatten()
    prediction_values = predictions.values.flatten()
    
    # Remove NaN values
    mask = ~np.isnan(actual_values) & ~np.isnan(prediction_values)
    actual_values = actual_values[mask]
    prediction_values = prediction_values[mask]
    
    # Calculate R² coefficient
    r2 = r2_score(actual_values, prediction_values)
    return r2

r2 = calculate_r2(actual_short, normal_short)
print(f"{data}, {r2}")

Testing, 0.7437180475424854
