In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 


train_short = pd.read_csv("TrainingShort/TrainingActual_Short.csv")
val_short = pd.read_csv("ValidationShort/ValidationActual_Short.csv")
test_short = pd.read_csv("TestingShort/TestingActual_Short.csv")

train_preds = pd.read_csv("TrainingShort/TrainingPredictionsShort.csv")
val_preds = pd.read_csv("ValidationShort/ValidationPredictionsShort.csv")
test_preds = pd.read_csv("TestingShort/TestingPredictionsShort.csv")


In [2]:
from sklearn.metrics import mean_absolute_error, root_mean_squared_error, mean_absolute_percentage_error

ncols_train = train_short.shape[1]
ncols_val = val_short.shape[1]
ncols_test = test_short.shape[1]

def evaluate(output, preds):
    errors_mae = []
    errors_mape = [] 
    errors_rmse = [] 
    ncols = output.shape[1] 
    for i in range(ncols): 
        output_col = output.iloc[:, i]
        preds_col = preds.iloc[:, i]
        
        errors_mae.append(mean_absolute_error(output_col, preds_col))
        errors_mape.append(mean_absolute_percentage_error(output_col, preds_col))
        errors_rmse.append(root_mean_squared_error(output_col, preds_col))
        
    
    errors_mae = np.array(errors_mae)
    errors_mape = np.array(errors_mape)
    errors_rmse = np.array(errors_rmse)


    return np.sum(errors_mae)/ncols, np.sum(errors_rmse)/ncols, np.sum(errors_mape)/ncols,



errors = evaluate(test_short, test_preds)
print(errors)    

(281.6339556296296, 334.3240217673921, 0.08984890789625882)


# Calculating maximum and minimum electricity demand differences

In [3]:
import ast
import pandas as pd 
long_train100 = pd.read_csv("TestingLong/hundred_preds_long.csv")
long_val100 = pd.read_csv("ValidationLong/hundred_preds_long.csv")
long_test100 = pd.read_csv("TestingLong/hundred_preds_long.csv")

short_train100 = pd.read_csv("TrainingShort/hundred_preds_short.csv")
short_val100 = pd.read_csv("ValidationShort/hundred_preds_short.csv")
short_test100 = pd.read_csv("TestingShort/hundred_preds_short.csv")



def convert_to_floatdf(df):
    if "Unnamed: 0" in df.columns.values:
        x = df.drop(["Unnamed: 0"], inplace = False, axis =1) # drop that column. 
    else: 
        x = df.copy() 
    for column in x.columns:
        x[column] = x[column].apply(lambda x : list(map(float, ast.literal_eval(x))))
    return x  

long_train100 = convert_to_floatdf(long_train100)
long_val100 = convert_to_floatdf(long_val100)
long_test100 = convert_to_floatdf(long_test100) # conversion to floats. 

short_train100 = convert_to_floatdf(short_train100)
short_val100 = convert_to_floatdf(short_val100)
short_test100 = convert_to_floatdf(short_test100)

In [7]:
import numpy as np 
def calculate_encapsulation_frequency(preds_df, actuals_df, lower_percentile = 2.5, upper_percentile = 97.5):
    encapsulation_count = 0
    total_count = 0

    rows_len, cols_len = preds_df.shape[0], preds_df.shape[1]
    
    for i in range(rows_len): ## go by each row and col.
        for j in range(cols_len): 
            actual_val = actuals_df.iloc[i, j] # get the actual val. 
            
            list_val = preds_df.iloc[i,j] # get the list. 
            upper_bound = np.percentile(list_val, upper_percentile)
            lower_bound = np.percentile(list_val, lower_percentile) # get the upper and lower bounds. 
            
            if lower_bound <= actual_val <= upper_bound:
                encapsulation_count +=1 
            total_count+=1 
    
    return encapsulation_count/total_count * 100 
    

actual_test_long = pd.read_csv("TestingLong/TestingActual_Long.csv")
actual_test_short = pd.read_csv("TestingShort/TestingActual_Short.csv")
test_encap = calculate_encapsulation_frequency(short_test100, actual_test_short, 5, upper_percentile=95)

In [8]:
print(test_encap)

97.331996945399
