In [12]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 


train_short = pd.read_csv("TrainingShort/TrainingActual_Short.csv")
val_short = pd.read_csv("ValidationShort/ValidationActual_Short.csv")
test_short = pd.read_csv("TestingShort/TestingActual_Short.csv")


train_preds = pd.read_csv("TrainingShort/TrainingPredictionsShort.csv")
val_preds = pd.read_csv("ValidationShort/ValidationPredictionsShort.csv")
test_preds = pd.read_csv("TestingShort/TestingPredictionsShort.csv")

In [13]:
train_long_actual = pd.read_csv("TrainingLong/TrainingActual_Long.csv")
val_long_actual = pd.read_csv("ValidationLong/ValidationActual_Long.csv")
test_long_actual = pd.read_csv("TestingLong/TestingActual_Long.csv")

train_long_preds = pd.read_csv("TrainingLong/TrainingPredictionsLong_Gaussian.csv")
val_long_preds = pd.read_csv("ValidationLong/ValidationPredictionsLong_Gaussian.csv")
test_long_preds = pd.read_csv("TestingLong/TestingPredictionsLong_Gaussian.csv")

In [17]:
from sklearn.metrics import mean_absolute_error, root_mean_squared_error, mean_absolute_percentage_error

ncols_train = train_short.shape[1]
ncols_val = val_short.shape[1]
ncols_test = test_short.shape[1]

def evaluate(output, preds):
    errors_mae = []
    errors_mape = [] 
    errors_rmse = [] 
    ncols = output.shape[1] 
    for i in range(ncols): 
        output_col = output.iloc[:, i]
        preds_col = preds.iloc[:, i]
        
        errors_mae.append(mean_absolute_error(output_col, preds_col))
        errors_mape.append(mean_absolute_percentage_error(output_col, preds_col))
        errors_rmse.append(root_mean_squared_error(output_col, preds_col))
        
    
    errors_mae = np.array(errors_mae)
    errors_mape = np.array(errors_mape)
    errors_rmse = np.array(errors_rmse)


    return np.sum(errors_mae)/ncols, np.sum(errors_rmse)/ncols, np.sum(errors_mape)/ncols,



errors = evaluate(train_long_preds, train_long_actual)
print(errors)    

(104.31314689643347, 124.30283596465956, 0.03126279433770484)


# Calculating maximum and minimum electricity demand differences

In [7]:
import ast
import pandas as pd 
long_train100 = pd.read_csv("TestingLong/hundred_preds_Long.csv")
long_val100 = pd.read_csv("ValidationLong/hundred_preds_Long.csv")
long_test100 = pd.read_csv("TestingLong/hundred_preds_Long.csv")

short_train100 = pd.read_csv("TrainingShort/hundred_preds_Short.csv")
short_val100 = pd.read_csv("ValidationShort/hundred_preds_Short.csv")
short_test100 = pd.read_csv("TestingShort/hundred_preds_Short.csv")



def convert_to_floatdf(df):
    if "Unnamed: 0" in df.columns.values:
        x = df.drop(["Unnamed: 0"], inplace = False, axis =1) # drop that column. 
    else: 
        x = df.copy() 
    for column in x.columns:
        x[column] = x[column].apply(lambda x : list(map(float, ast.literal_eval(x))))
    return x  

long_train100 = convert_to_floatdf(long_train100)   
long_val100 = convert_to_floatdf(long_val100)  
long_test100 = convert_to_floatdf(long_test100) # conversion to floats. 

short_train100 = convert_to_floatdf(short_train100)     
short_val100 = convert_to_floatdf(short_val100)   
short_test100 = convert_to_floatdf(short_test100)    

In [8]:
import numpy as np 
def calculate_encapsulation_frequency(preds_df, actuals_df, lower_percentile = 2.5, upper_percentile = 97.5):
    encapsulation_count = 0
    total_count = 0

    rows_len, cols_len = preds_df.shape[0], preds_df.shape[1]
    
    for i in range(rows_len): ## go by each row and col.
        for j in range(cols_len): 
            actual_val = actuals_df.iloc[i, j] # get the actual val. 
            
            list_val = preds_df.iloc[i,j] # get the list. 
            upper_bound = np.percentile(list_val, upper_percentile)
            lower_bound = np.percentile(list_val, lower_percentile) # get the upper and lower bounds. 
            
            if lower_bound <= actual_val <= upper_bound:
                encapsulation_count +=1 
            total_count+=1 
    
    return encapsulation_count/total_count * 100 
    

actual_test_long = pd.read_csv("TestingLong/TestingActual_Long.csv")
actual_test_short = pd.read_csv("TestingShort/TestingActual_Short.csv")
test_encap = calculate_encapsulation_frequency(short_test100, actual_test_short, 5, upper_percentile=95)

In [9]:
print(test_encap) 

97.331996945399


# Bias

In [10]:
def convert_columns(df):
    return pd.to_datetime(df.columns.str.extract(r'(\d{1,2}/\d{1,2}/\d{4})')[0], format='%m/%d/%Y', errors='coerce')

In [11]:
normal_short = pd.read_csv("/home/jik19004/FilesToRun/BayesianTimeSeries/Baselines/TestingShort/TestingPredictionsShort.csv")
normal_long = pd.read_csv("/home/jik19004/FilesToRun/BayesianTimeSeries/Baselines/TestingLong/TestingPredictionsLong.csv")

bayesian_short = pd.read_csv("/home/jik19004/FilesToRun/BayesianTimeSeries/TestingShort/TestingPredictionsShort.csv")
bayesian_long = pd.read_csv("/home/jik19004/FilesToRun/BayesianTimeSeries/TestingLong/TestingPredictionsLong.csv")
bayesian_long_gauss = pd.read_csv("/home/jik19004/FilesToRun/BayesianTimeSeries/TestingLong/TestingPredictionsLong_Gaussian.csv")

normal_short.columns = convert_columns(normal_short)
normal_long.columns = convert_columns(normal_long)

bayesian_short.columns = convert_columns(bayesian_short)
bayesian_long.columns = convert_columns(bayesian_long) 
bayesian_long_gauss.columns = convert_columns(bayesian_long_gauss) 

actual_test_short = pd.read_csv("/home/jik19004/FilesToRun/BayesianTimeSeries/TestingShort/TestingActual_Short.csv")
actual_test_short.columns = convert_columns(actual_test_short) # convert the actual_short. 

actual_test_long = pd.read_csv("/home/jik19004/FilesToRun/BayesianTimeSeries/TestingLong/TestingActual_Long.csv")
actual_test_long.columns = convert_columns(actual_test_long)


def calculate_bias(actuals, predictions, months, year):
    all_actuals = []
    all_predictions = []
    
    for month in months:
        # Filter data for the specific month and year
        filtered_cols = [col for col in actuals.columns if col.month == month and col.year == year]
        monthly_actuals = actuals[filtered_cols].values.flatten()
        monthly_predictions = predictions[filtered_cols].values.flatten()
        # Remove NaN values
        mask = ~np.isnan(monthly_actuals) & ~np.isnan(monthly_predictions)
        monthly_actuals = monthly_actuals[mask]
        monthly_predictions = monthly_predictions[mask]
        
        # Accumulate data for the overall calculation
        all_actuals.extend(monthly_actuals)
        all_predictions.extend(monthly_predictions)
    all_actuals = np.array(all_actuals)
    all_predictions = np.array(all_predictions)
    
    return np.sum(all_predictions - all_actuals)/(len(all_predictions))


r2_scores = {}

short_years = [2019,2020,2021] 
months = [3,4,5]

for year in short_years: 
    r2_scores[year] = calculate_bias(actual_test_short, bayesian_short, months, year) # calculate the r2 score for each year. 
# Display the R^2 scores for each month
print("Bias values:")
r2_scores


Bias values:


{2019: -73.5386166213768, 2020: -81.6790879402174, 2021: -58.55308632783883}

In [10]:
col = actual.columns[0]

if "2019" in col: 
    print("Yes")

Yes
