In [1]:
from sklearn.linear_model import Lasso
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from matplotlib import pyplot
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

In [2]:
def add_hours_dam(df, start_hour, interval=48):
    for i in range(start_hour, start_hour+interval):
        start = str(i)
        df["DAM LMP_PRC_"+start+"_hours_ago"] = 0.0
    for i in range(len(df)-1, interval-1, -1):
        for j in range(start_hour, start_hour+interval):
            start = str(j)
            v = df['DAM LMP_PRC'].iloc[i-j,] 
            df.at[i, "DAM LMP_PRC_"+start+"_hours_ago"] =  v
    return df

In [3]:
def add_hours_rtm(df, start_hour, interval=48):
    for rtm in range(1, 13):
        for i in range(start_hour, start_hour+interval):
            start = str(i)
            df["RTM_"+str(rtm) +"_LMP_PRC_"+start+"_hours_ago"] = 0.0
        for i in range(len(df)-1, interval-1, -1):
            for j in range(start_hour, start_hour+interval):
                start = str(j)
                v = df[('RTM LMP_PRC', rtm)].iloc[i-j,] 
                df.at[i, "RTM_"+str(rtm) +"_LMP_PRC_"+start+"_hours_ago"] =  v
    return df

In [4]:
def add_hours_gap(df, start_hour, interval=48):
    for i in range(start_hour, start_hour+interval):
        start = str(i)
        df["GAP LMP_PRC_"+start+"_hours_ago"] = 0.0
    for i in range(len(df)-1, interval-1, -1):
        for j in range(start_hour, start_hour+interval):
            start = str(j)
            v = df['Gap'].iloc[i-j,] 
            df.at[i, "GAP LMP_PRC_"+start+"_hours_ago"] =  v
    return df

In [5]:
feature_names = ['DAM LMP_PRC_1_hours_ago',
 'GAP LMP_PRC_24_hours_ago',
 'DAM LMP_PRC_24_hours_ago',
 'DAM LMP_PRC_7_hours_ago',
 'RTM_1_LMP_PRC_24_hours_ago',
 'GAP LMP_PRC_58_hours_ago',
 'DAM LMP_PRC_23_hours_ago',
 'RTM_1_LMP_PRC_33_hours_ago',
 'RTM_1_LMP_PRC_48_hours_ago',
 'GAP LMP_PRC_33_hours_ago',
 'GAP LMP_PRC_48_hours_ago',
 'DAM LMP_PRC_20_hours_ago',
 'Solar Zenith Angle',
 'RTM_1_LMP_PRC_20_hours_ago',
 'RTM_1_LMP_PRC_56_hours_ago',
 'RTM_1_LMP_PRC_25_hours_ago',
 'GAP LMP_PRC_20_hours_ago',
 'GAP LMP_PRC_54_hours_ago',
 'Demand_Forecast_Day_Ahead',
 'GAP LMP_PRC_43_hours_ago',
 'GAP LMP_PRC_34_hours_ago',
 'RTM_1_LMP_PRC_47_hours_ago',
 'DAM LMP_PRC_9_hours_ago',
 'GAP LMP_PRC_12_hours_ago',
 'GAP LMP_PRC_23_hours_ago',
 'DAM LMP_PRC_44_hours_ago',
 'RTM_1_LMP_PRC_34_hours_ago',
 'RTM_1_LMP_PRC_21_hours_ago',
 'GAP LMP_PRC_46_hours_ago',
 'GAP LMP_PRC_47_hours_ago',
 'RTM_1_LMP_PRC_17_hours_ago',
 'RTM_1_LMP_PRC_54_hours_ago',
 'GAP LMP_PRC_50_hours_ago',
 'Relative Humidity',
 'GAP LMP_PRC_56_hours_ago',
 'RTM_1_LMP_PRC_55_hours_ago',
 'GAP LMP_PRC_17_hours_ago',
 'GAP LMP_PRC_42_hours_ago',
 'RTM_1_LMP_PRC_46_hours_ago',
 'RTM_1_LMP_PRC_23_hours_ago',
 'GAP LMP_PRC_59_hours_ago',
 'Precipitable Water',
 'DAM LMP_PRC_22_hours_ago',
 'DAM LMP_PRC_47_hours_ago',
 'relative_humidity_set_1',
 'RTM_1_LMP_PRC_59_hours_ago',
 'RTM_1_LMP_PRC_18_hours_ago',
 'GAP LMP_PRC_18_hours_ago',
 'RTM_1_LMP_PRC_43_hours_ago',
 'DAM LMP_PRC_39_hours_ago',
 'DAM LMP_PRC_5_hours_ago',
 'RTM_1_LMP_PRC_58_hours_ago',
 'RTM_1_LMP_PRC_44_hours_ago',
 'cloud_layer_3_code_set_1',
 'dewpoint',
 'GAP LMP_PRC_57_hours_ago',
 'GAP LMP_PRC_40_hours_ago',
 'DAM LMP_PRC_6_hours_ago',
 'GAP LMP_PRC_55_hours_ago',
 'GAP LMP_PRC_44_hours_ago',
 'GAP LMP_PRC_13_hours_ago',
 'RTM_1_LMP_PRC_12_hours_ago',
 'DAM LMP_PRC_11_hours_ago',
 'GAP LMP_PRC_14_hours_ago',
 'RTM_1_LMP_PRC_27_hours_ago',
 'DAM LMP_PRC_2_hours_ago',
 'Demand_Forecast_7Day_Ahead',
 'GAP LMP_PRC_41_hours_ago',
 'GAP LMP_PRC_21_hours_ago',
 'DAM LMP_PRC_18_hours_ago',
 'windspeed',
 'Wind Direction',
 'Demand_Forecast_2Day_Ahead',
 'DAM LMP_PRC_48_hours_ago',
 'GAP LMP_PRC_26_hours_ago',
 'RTM_1_LMP_PRC_49_hours_ago',
 'RTM_1_LMP_PRC_51_hours_ago',
 ('DAM RT_Forecast_RTPD', 1),
 'DAM LMP_PRC_10_hours_ago',
 'DAM LMP_PRC_15_hours_ago',
 'DAM LMP_PRC_45_hours_ago',
 ('RT_Forecast_RTD', 1),
 'RTM_1_LMP_PRC_29_hours_ago',
 ('DAM RT_Forecast_RTPD', 2),
 'RTM_1_LMP_PRC_15_hours_ago',
 'humidity',
 'RTM_1_LMP_PRC_53_hours_ago',
 'DAM LMP_PRC_3_hours_ago',
 'GAP LMP_PRC_37_hours_ago',
 'RTM_1_LMP_PRC_26_hours_ago',
 'GAP LMP_PRC_36_hours_ago',
 'GAP LMP_PRC_31_hours_ago',
 'GAP LMP_PRC_49_hours_ago',
 'RTM_1_LMP_PRC_38_hours_ago',
 'RTM_1_LMP_PRC_22_hours_ago',
 'DHI',
 'RTM_1_LMP_PRC_42_hours_ago',
 'DAM LMP_PRC_19_hours_ago',
 'DAM LMP_PRC_21_hours_ago',
 'RTM_1_LMP_PRC_16_hours_ago',
 'Day',
 'GAP LMP_PRC_25_hours_ago',
 'cloud_layer_2_code_set_1',
 'DAM LMP_PRC_25_hours_ago',
 'DAM LMP_PRC_13_hours_ago',
 'cloud_layer_1_code_set_1',
 'RTM_1_LMP_PRC_31_hours_ago',
 'RTM_1_LMP_PRC_35_hours_ago',
 'DAM LMP_PRC_38_hours_ago',
 'GAP LMP_PRC_45_hours_ago',
 'RTM_1_LMP_PRC_37_hours_ago',
 'pressure',
 'GAP LMP_PRC_19_hours_ago',
 'GAP LMP_PRC_27_hours_ago',
 'GAP LMP_PRC_32_hours_ago',
 'DAM LMP_PRC_12_hours_ago',
 'RTM_1_LMP_PRC_36_hours_ago',
 'RTM_1_LMP_PRC_45_hours_ago',
 'GAP LMP_PRC_28_hours_ago',
 'GAP LMP_PRC_15_hours_ago',
 'ceiling_set_1',
 'sea_level_pressure_set_1d',
 'DAM LMP_PRC_35_hours_ago',
 'DAM LMP_PRC_37_hours_ago',
 'GAP LMP_PRC_29_hours_ago',
 'RTM_1_LMP_PRC_32_hours_ago',
 'DAM LMP_PRC_46_hours_ago',
 'DAM LMP_PRC_26_hours_ago',
 'altimeter_set_1',
 'RTM_1_LMP_PRC_13_hours_ago',
 'Temperature',
 'DAM LMP_PRC_33_hours_ago',
 'RTM_1_LMP_PRC_28_hours_ago',
 'GHI',
 'DAM LMP_PRC_28_hours_ago',
 'Fill Flag',
 'RTM_1_LMP_PRC_50_hours_ago',
 'DAM LMP_PRC_14_hours_ago',
 'DAM LMP_PRC_30_hours_ago',
 ('RT_Forecast_RTD', 3),
 ('RT_Forecast_RTD', 4),
 'DAM LMP_PRC_16_hours_ago',
 'DAM LMP_PRC_43_hours_ago',
 'GAP LMP_PRC_52_hours_ago',
 'DAM LMP_PRC_8_hours_ago',
 'GAP LMP_PRC_16_hours_ago',
 'GAP LMP_PRC_51_hours_ago',
 'visibility_set_1',
 'RTM_1_LMP_PRC_57_hours_ago',
 ('RT_Forecast_RTD', 2),
 'GAP LMP_PRC_35_hours_ago',
 ('RT_Forecast_RTD', 12),
 'DAM LMP_PRC_34_hours_ago',
 'RTM_1_LMP_PRC_52_hours_ago',
 'RTM_1_LMP_PRC_30_hours_ago',
 'DAM LMP_PRC_32_hours_ago',
 'GAP LMP_PRC_53_hours_ago',
 ('RT_Forecast_RTD', 6),
 'DAM LMP_PRC_42_hours_ago',
 'DAM LMP_PRC_29_hours_ago',
 'RTM_1_LMP_PRC_19_hours_ago',
 'DAM LMP_PRC_17_hours_ago',
 'DAM LMP_PRC_36_hours_ago',
 'DAM LMP_PRC_4_hours_ago',
 ('RT_Forecast_RTD', 10),
 'Clearsky DHI',
 'GAP LMP_PRC_38_hours_ago',
 'RTM_1_LMP_PRC_39_hours_ago',
 ('RT_Forecast_RTD', 8),
 ('RT_Forecast_RTD', 7),
 'DAM LMP_PRC_41_hours_ago',
 'DAM LMP_PRC_40_hours_ago',
 'RTM_1_LMP_PRC_40_hours_ago',
 'Month',
 'GAP LMP_PRC_22_hours_ago',
 'GAP LMP_PRC_30_hours_ago',
 'Hour',
 'DAM LMP_PRC_27_hours_ago',
 ('DAM RT_Forecast_RTPD', 4),
 ('DAM RT_Forecast_RTPD', 3),
 ('RT_Forecast_RTD', 11),
 'Clearsky DNI',
 'RTM_1_LMP_PRC_41_hours_ago',
 'Clearsky GHI',
 'GAP LMP_PRC_39_hours_ago',
 'DNI',
 'hr_index',
 ('RT_Forecast_RTD', 5),
 'RTM_1_LMP_PRC_14_hours_ago',
 'weather_condition_set_1d',
 'Cloud Type',
 'precipitation',
 'wind_gust_set_1',
 ('RT_Forecast_RTD', 9),
 'DAM LMP_PRC_31_hours_ago',
 'IsHoliday',
 'Year',
 'weather_cond_code_set_1',
 'IsWeekday',
 'heat_index_set_1d',
 'wind_chill_set_1d',
 'precipitation_6',
 'IsWeekend',
 'precip_accum_one_hour_set_1']

In [7]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from math import sqrt


dam_MAE_scores = []
dam_rMAE_scores = []
dam_RMSE_scores = []
dam_rRMSE_scores = []

def dam_predictions():
    for hour in range(1, 25):

        Y_cols = ['DAM LMP_PRC'] 
        X_cols = [col for col in list(df) if col not in Y_cols+[
        ('HASP LMP_CONG_PRC', 1),
        ('HASP LMP_CONG_PRC', 2),
        ('HASP LMP_CONG_PRC', 3),
        ('HASP LMP_CONG_PRC', 4),
        ('HASP LMP_ENE_PRC', 1),
        ('HASP LMP_ENE_PRC', 2),
        ('HASP LMP_ENE_PRC', 3),
        ('HASP LMP_ENE_PRC', 4),
        ('HASP LMP_GHG_PRC', 1),
        ('HASP LMP_LOSS_PRC', 1),
        ('HASP LMP_LOSS_PRC', 2),
        ('HASP LMP_LOSS_PRC', 3),
        ('HASP LMP_LOSS_PRC', 4),
        ('HASP LMP_PRC', 1),
        ('HASP LMP_PRC', 2),
        ('HASP LMP_PRC', 3),
        ('HASP LMP_PRC', 4),
        ('RTM LMP_CONG_PRC', 1),
        ('RTM LMP_CONG_PRC', 2),
        ('RTM LMP_CONG_PRC', 3),
        ('RTM LMP_CONG_PRC', 4),
        ('RTM LMP_CONG_PRC', 5),
        ('RTM LMP_CONG_PRC', 6),
        ('RTM LMP_CONG_PRC', 7),
        ('RTM LMP_CONG_PRC', 8),
        ('RTM LMP_CONG_PRC', 9),
        ('RTM LMP_CONG_PRC', 10),
        ('RTM LMP_CONG_PRC', 11),
        ('RTM LMP_CONG_PRC', 12),
        ('RTM LMP_ENE_PRC', 1),
        ('RTM LMP_ENE_PRC', 2),
        ('RTM LMP_ENE_PRC', 3),
        ('RTM LMP_ENE_PRC', 4),
        ('RTM LMP_ENE_PRC', 5),
        ('RTM LMP_ENE_PRC', 6),
        ('RTM LMP_ENE_PRC', 7),
        ('RTM LMP_ENE_PRC', 8),
        ('RTM LMP_ENE_PRC', 9),
        ('RTM LMP_ENE_PRC', 10),
        ('RTM LMP_ENE_PRC', 11),
        ('RTM LMP_ENE_PRC', 12),
        ('RTM LMP_GHG_PRC', 1),
        ('RTM LMP_GHG_PRC', 7),
        ('RTM LMP_LOSS_PRC', 1),
        ('RTM LMP_LOSS_PRC', 2),
        ('RTM LMP_LOSS_PRC', 3),
        ('RTM LMP_LOSS_PRC', 4),
        ('RTM LMP_LOSS_PRC', 5),
        ('RTM LMP_LOSS_PRC', 6),
        ('RTM LMP_LOSS_PRC', 7),
        ('RTM LMP_LOSS_PRC', 8),
        ('RTM LMP_LOSS_PRC', 9),
        ('RTM LMP_LOSS_PRC', 10),
        ('RTM LMP_LOSS_PRC', 11),
        ('RTM LMP_LOSS_PRC', 12),
        ('RTM LMP_PRC', 1),
        ('RTM LMP_PRC', 2),
        ('RTM LMP_PRC', 3),
        ('RTM LMP_PRC', 4),
        ('RTM LMP_PRC', 5),
        ('RTM LMP_PRC', 6),
        ('RTM LMP_PRC', 7),
        ('RTM LMP_PRC', 8),
        ('RTM LMP_PRC', 9),
        ('RTM LMP_PRC', 10),
        ('RTM LMP_PRC', 11),
        ('RTM LMP_PRC', 12),
        'DAM LMP_CONG_PRC',
        'DAM LMP_GHG_PRC',
        ('HASP LMP_CONG_PRC', 1),
        ('HASP LMP_CONG_PRC', 2),
        ('HASP LMP_CONG_PRC', 3),
        ('HASP LMP_CONG_PRC', 4),
        ('HASP LMP_ENE_PRC', 1),
        ('HASP LMP_ENE_PRC', 2),
        ('HASP LMP_ENE_PRC', 3),
        ('HASP LMP_ENE_PRC', 4),
        ('HASP LMP_GHG_PRC', 1),
        ('HASP LMP_LOSS_PRC', 1),
        ('HASP LMP_LOSS_PRC', 2),
        ('HASP LMP_LOSS_PRC', 3),
        ('HASP LMP_LOSS_PRC', 4),
        ('HASP LMP_PRC', 1),
        ('HASP LMP_PRC', 2),
        ('HASP LMP_PRC', 3),
        ('HASP LMP_PRC', 4),
        ('RTM LMP_CONG_PRC', 1),
        ('RTM LMP_CONG_PRC', 2),
        ('RTM LMP_CONG_PRC', 3),
        ('RTM LMP_CONG_PRC', 4),
        ('RTM LMP_CONG_PRC', 5),
        ('RTM LMP_CONG_PRC', 6),
        ('RTM LMP_CONG_PRC', 7),
        ('RTM LMP_CONG_PRC', 8),
        ('RTM LMP_CONG_PRC', 9),
        ('RTM LMP_CONG_PRC', 10),
        ('RTM LMP_CONG_PRC', 11),
        ('RTM LMP_CONG_PRC', 12),
        ('RTM LMP_GHG_PRC', 1),
        ('RTM LMP_GHG_PRC', 7),
        'DAM LMP_ENE_PRC',
        'DAM LMP_LOSS_PRC',
        'Actual_Demand',]]
        Y = df[Y_cols]
        X = df[X_cols]
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, shuffle=True, random_state=42)
        lasso = Lasso()
        lasso.fit(X_train,Y_train)
        predictions= lasso.predict(X_test)
        predicted_sum = 0 
        for i in range(len(predictions)):
            predicted_sum += predictions[i]
        normalized_predicted_sum = predicted_sum/len(predictions)
        dam_MAE_scores.append(mean_absolute_error(Y_test, predictions))
        dam_rMAE_scores.append((mean_absolute_error(Y_test, predictions))/normalized_predicted_sum)
        dam_RMSE_scores.append(sqrt(mean_squared_error(Y_test, predictions)))
        dam_rRMSE_scores.append((sqrt(mean_squared_error(Y_test, predictions))/normalized_predicted_sum))

dam_predictions()
print(dam_MAE_scores)
print(dam_rMAE_scores)
print(dam_RMSE_scores)
print(dam_rRMSE_scores)

NameError: name 'df' is not defined

In [6]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from math import sqrt

rtm_MAE_scores = []
rtm_rMAE_scores = []
rtm_RMSE_scores = []
rtm_rRMSE_scores = []
rtm_r2_scores = []

def rtm_predictions():
    for hour in range(1, 25):
        df = pd.read_pickle('df.pkl')
        df = df.reset_index(drop=True)
        df = add_hours_dam(df, hour, 168)
        df = add_hours_rtm(df, hour+11, 168)
        Y_cols = [('RTM LMP_PRC', 6)] 
        X_cols = [col for col in list(df) if col not in Y_cols+[
        ('HASP LMP_CONG_PRC', 1),
        ('HASP LMP_CONG_PRC', 2),
        ('HASP LMP_CONG_PRC', 3),
        ('HASP LMP_CONG_PRC', 4),
        ('HASP LMP_ENE_PRC', 1),
        ('HASP LMP_ENE_PRC', 2),
        ('HASP LMP_ENE_PRC', 3),
        ('HASP LMP_ENE_PRC', 4),
        ('HASP LMP_GHG_PRC', 1),
        ('HASP LMP_LOSS_PRC', 1),
        ('HASP LMP_LOSS_PRC', 2),
        ('HASP LMP_LOSS_PRC', 3),
        ('HASP LMP_LOSS_PRC', 4),
        ('HASP LMP_PRC', 1),
        ('HASP LMP_PRC', 2),
        ('HASP LMP_PRC', 3),
        ('HASP LMP_PRC', 4),
        ('RTM LMP_CONG_PRC', 1),
        ('RTM LMP_CONG_PRC', 2),
        ('RTM LMP_CONG_PRC', 3),
        ('RTM LMP_CONG_PRC', 4),
        ('RTM LMP_CONG_PRC', 5),
        ('RTM LMP_CONG_PRC', 6),
        ('RTM LMP_CONG_PRC', 7),
        ('RTM LMP_CONG_PRC', 8),
        ('RTM LMP_CONG_PRC', 9),
        ('RTM LMP_CONG_PRC', 10),
        ('RTM LMP_CONG_PRC', 11),
        ('RTM LMP_CONG_PRC', 12),
        ('RTM LMP_ENE_PRC', 1),
        ('RTM LMP_ENE_PRC', 2),
        ('RTM LMP_ENE_PRC', 3),
        ('RTM LMP_ENE_PRC', 4),
        ('RTM LMP_ENE_PRC', 5),
        ('RTM LMP_ENE_PRC', 6),
        ('RTM LMP_ENE_PRC', 7),
        ('RTM LMP_ENE_PRC', 8),
        ('RTM LMP_ENE_PRC', 9),
        ('RTM LMP_ENE_PRC', 10),
        ('RTM LMP_ENE_PRC', 11),
        ('RTM LMP_ENE_PRC', 12),
        ('RTM LMP_GHG_PRC', 1),
        ('RTM LMP_GHG_PRC', 7),
        ('RTM LMP_LOSS_PRC', 1),
        ('RTM LMP_LOSS_PRC', 2),
        ('RTM LMP_LOSS_PRC', 3),
        ('RTM LMP_LOSS_PRC', 4),
        ('RTM LMP_LOSS_PRC', 5),
        ('RTM LMP_LOSS_PRC', 6),
        ('RTM LMP_LOSS_PRC', 7),
        ('RTM LMP_LOSS_PRC', 8),
        ('RTM LMP_LOSS_PRC', 9),
        ('RTM LMP_LOSS_PRC', 10),
        ('RTM LMP_LOSS_PRC', 11),
        ('RTM LMP_LOSS_PRC', 12),
        ('RTM LMP_PRC', 1),
        ('RTM LMP_PRC', 2),
        ('RTM LMP_PRC', 3),
        ('RTM LMP_PRC', 4),
        ('RTM LMP_PRC', 5),
        ('RTM LMP_PRC', 6),
        ('RTM LMP_PRC', 7),
        ('RTM LMP_PRC', 8),
        ('RTM LMP_PRC', 9),
        ('RTM LMP_PRC', 10),
        ('RTM LMP_PRC', 11),
        ('RTM LMP_PRC', 12),
        'DAM LMP_CONG_PRC',
        'DAM LMP_GHG_PRC',
        ('HASP LMP_CONG_PRC', 1),
        ('HASP LMP_CONG_PRC', 2),
        ('HASP LMP_CONG_PRC', 3),
        ('HASP LMP_CONG_PRC', 4),
        ('HASP LMP_ENE_PRC', 1),
        ('HASP LMP_ENE_PRC', 2),
        ('HASP LMP_ENE_PRC', 3),
        ('HASP LMP_ENE_PRC', 4),
        ('HASP LMP_GHG_PRC', 1),
        ('HASP LMP_LOSS_PRC', 1),
        ('HASP LMP_LOSS_PRC', 2),
        ('HASP LMP_LOSS_PRC', 3),
        ('HASP LMP_LOSS_PRC', 4),
        ('HASP LMP_PRC', 1),
        ('HASP LMP_PRC', 2),
        ('HASP LMP_PRC', 3),
        ('HASP LMP_PRC', 4),
        ('RTM LMP_CONG_PRC', 1),
        ('RTM LMP_CONG_PRC', 2),
        ('RTM LMP_CONG_PRC', 3),
        ('RTM LMP_CONG_PRC', 4),
        ('RTM LMP_CONG_PRC', 5),
        ('RTM LMP_CONG_PRC', 6),
        ('RTM LMP_CONG_PRC', 7),
        ('RTM LMP_CONG_PRC', 8),
        ('RTM LMP_CONG_PRC', 9),
        ('RTM LMP_CONG_PRC', 10),
        ('RTM LMP_CONG_PRC', 11),
        ('RTM LMP_CONG_PRC', 12),
        ('RTM LMP_GHG_PRC', 1),
        ('RTM LMP_GHG_PRC', 7),
        'DAM LMP_ENE_PRC',
        'DAM LMP_LOSS_PRC',
        'Actual_Demand',]]
        Y = df[Y_cols]
        X = df[X_cols]
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, shuffle=True, random_state=42)
        lasso = Lasso()
        lasso.fit(X_train,Y_train)
        predictions= lasso.predict(X_test)
        predicted_sum = 0 
        for i in range(len(predictions)):
            predicted_sum += predictions[i]
        normalized_predicted_sum = predicted_sum/len(predictions)
    
        rtm_MAE_scores.append(mean_absolute_error(Y_test, predictions))
        rtm_rMAE_scores.append((mean_absolute_error(Y_test, predictions))/normalized_predicted_sum)
        rtm_RMSE_scores.append(sqrt(mean_squared_error(Y_test, predictions)))
        rtm_rRMSE_scores.append((sqrt(mean_squared_error(Y_test, predictions))/normalized_predicted_sum))
    return
rtm_predictions()
print(rtm_MAE_scores)
print(rtm_rMAE_scores)
print(rtm_RMSE_scores)
print(rtm_rRMSE_scores)

[39.798304986199064, 39.78921369524394, 39.84295539040146, 39.82880584169954, 40.04619127278228, 39.87557694310264, 39.93724123437998, 39.845974496133806, 39.964883497898406, 40.19530253867923, 40.34851683118301, 40.138485113985126, 40.29503552926649, 40.2217024651763, 40.13058707971432, 40.2216976059229, 40.084141819127986, 40.10515935537609, 40.20566761060327, 40.19410447508258, 40.18253866751047, 40.207895932100186, 40.177094497093755, 40.26229605291735]
[1.0550539862942476, 1.0531276641421632, 1.0561713095573724, 1.0595122368601118, 1.069261647713284, 1.0644688856122504, 1.0652175065144844, 1.060647269969483, 1.0649149816479646, 1.0627204413932898, 1.0645797265504273, 1.064566773050954, 1.062828436185558, 1.062877379267093, 1.0662226209807988, 1.0621681633338111, 1.0586099363434451, 1.0572539089913282, 1.057462631741435, 1.0558398951068202, 1.056346012659065, 1.0527451449291045, 1.0509790440228781, 1.052571519572709]
[86.95275694159953, 86.95709591905826, 87.02191615411733, 86.8309

In [10]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from math import sqrt

gap_MAE_scores = []
gap_rMAE_scores = []
gap_RMSE_scores = []
gap_rRMSE_scores = []

f_predictions = []

df = pd.read_pickle('df.pkl')
df = df.reset_index(drop=True)
df['Gap'] = df['DAM LMP_PRC'] - df[('RTM LMP_PRC', 1)]
df = add_hours_dam(df, 1, 121)
df = add_hours_rtm(df, 12, 121)
df = add_hours_gap(df, 12,  121)

def gap_predictions():
    for hour in range(1, 25):
        Y_cols = ['Gap'] 
        X_cols = [col for col in feature_names if col not in Y_cols+[
        ('HASP LMP_CONG_PRC', 1),
        ('HASP LMP_CONG_PRC', 2),
        ('HASP LMP_CONG_PRC', 3),
        ('HASP LMP_CONG_PRC', 4),
        ('HASP LMP_ENE_PRC', 1),
        ('HASP LMP_ENE_PRC', 2),
        ('HASP LMP_ENE_PRC', 3),
        ('HASP LMP_ENE_PRC', 4),
        ('HASP LMP_GHG_PRC', 1),
        ('HASP LMP_LOSS_PRC', 1),
        ('HASP LMP_LOSS_PRC', 2),
        ('HASP LMP_LOSS_PRC', 3),
        ('HASP LMP_LOSS_PRC', 4),
        ('HASP LMP_PRC', 1),
        ('HASP LMP_PRC', 2),
        ('HASP LMP_PRC', 3),
        ('HASP LMP_PRC', 4),
        ('RTM LMP_CONG_PRC', 1),
        ('RTM LMP_CONG_PRC', 2),
        ('RTM LMP_CONG_PRC', 3),
        ('RTM LMP_CONG_PRC', 4),
        ('RTM LMP_CONG_PRC', 5),
        ('RTM LMP_CONG_PRC', 6),
        ('RTM LMP_CONG_PRC', 7),
        ('RTM LMP_CONG_PRC', 8),
        ('RTM LMP_CONG_PRC', 9),
        ('RTM LMP_CONG_PRC', 10),
        ('RTM LMP_CONG_PRC', 11),
        ('RTM LMP_CONG_PRC', 12),
        ('RTM LMP_ENE_PRC', 1),
        ('RTM LMP_ENE_PRC', 2),
        ('RTM LMP_ENE_PRC', 3),
        ('RTM LMP_ENE_PRC', 4),
        ('RTM LMP_ENE_PRC', 5),
        ('RTM LMP_ENE_PRC', 6),
        ('RTM LMP_ENE_PRC', 7),
        ('RTM LMP_ENE_PRC', 8),
        ('RTM LMP_ENE_PRC', 9),
        ('RTM LMP_ENE_PRC', 10),
        ('RTM LMP_ENE_PRC', 11),
        ('RTM LMP_ENE_PRC', 12),
        ('RTM LMP_GHG_PRC', 1),
        ('RTM LMP_GHG_PRC', 7),
        ('RTM LMP_LOSS_PRC', 1),
        ('RTM LMP_LOSS_PRC', 2),
        ('RTM LMP_LOSS_PRC', 3),
        ('RTM LMP_LOSS_PRC', 4),
        ('RTM LMP_LOSS_PRC', 5),
        ('RTM LMP_LOSS_PRC', 6),
        ('RTM LMP_LOSS_PRC', 7),
        ('RTM LMP_LOSS_PRC', 8),
        ('RTM LMP_LOSS_PRC', 9),
        ('RTM LMP_LOSS_PRC', 10),
        ('RTM LMP_LOSS_PRC', 11),
        ('RTM LMP_LOSS_PRC', 12),
        ('RTM LMP_PRC', 1),
        ('RTM LMP_PRC', 2),
        ('RTM LMP_PRC', 3),
        ('RTM LMP_PRC', 4),
        ('RTM LMP_PRC', 5),
        ('RTM LMP_PRC', 6),
        ('RTM LMP_PRC', 7),
        ('RTM LMP_PRC', 8),
        ('RTM LMP_PRC', 9),
        ('RTM LMP_PRC', 10),
        ('RTM LMP_PRC', 11),
        ('RTM LMP_PRC', 12),
        'DAM LMP_CONG_PRC',
        'DAM LMP_GHG_PRC',
        ('HASP LMP_CONG_PRC', 1),
        ('HASP LMP_CONG_PRC', 2),
        ('HASP LMP_CONG_PRC', 3),
        ('HASP LMP_CONG_PRC', 4),
        ('HASP LMP_ENE_PRC', 1),
        ('HASP LMP_ENE_PRC', 2),
        ('HASP LMP_ENE_PRC', 3),
        ('HASP LMP_ENE_PRC', 4),
        ('HASP LMP_GHG_PRC', 1),
        ('HASP LMP_LOSS_PRC', 1),
        ('HASP LMP_LOSS_PRC', 2),
        ('HASP LMP_LOSS_PRC', 3),
        ('HASP LMP_LOSS_PRC', 4),
        ('HASP LMP_PRC', 1),
        ('HASP LMP_PRC', 2),
        ('HASP LMP_PRC', 3),
        ('HASP LMP_PRC', 4),
        ('RTM LMP_CONG_PRC', 1),
        ('RTM LMP_CONG_PRC', 2),
        ('RTM LMP_CONG_PRC', 3),
        ('RTM LMP_CONG_PRC', 4),
        ('RTM LMP_CONG_PRC', 5),
        ('RTM LMP_CONG_PRC', 6),
        ('RTM LMP_CONG_PRC', 7),
        ('RTM LMP_CONG_PRC', 8),
        ('RTM LMP_CONG_PRC', 9),
        ('RTM LMP_CONG_PRC', 10),
        ('RTM LMP_CONG_PRC', 11),
        ('RTM LMP_CONG_PRC', 12),
        ('RTM LMP_GHG_PRC', 1),
        ('RTM LMP_GHG_PRC', 7),
        'DAM LMP_ENE_PRC',
        'DAM LMP_LOSS_PRC',
        'Actual_Demand',]]
        Y = df[Y_cols]
        X = df[X_cols]
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, shuffle=False, random_state=42)
        lasso = Lasso()
        lasso.fit(X_train,Y_train)
        predictions = lasso.predict(X_test)
        f_predictions.append(predictions)
        predicted_sum = 0 
        for i in range(len(predictions)):
            predicted_sum += predictions[i]
        normalized_predicted_sum = predicted_sum/len(predictions)
        gap_MAE_scores.append(mean_absolute_error(Y_test, predictions))
        gap_rMAE_scores.append((mean_absolute_error(Y_test, predictions))/normalized_predicted_sum)
        gap_RMSE_scores.append(sqrt(mean_squared_error(Y_test, predictions)))
        gap_rRMSE_scores.append((sqrt(mean_squared_error(Y_test, predictions)))/normalized_predicted_sum)

gap_predictions()
print(gap_MAE_scores)
print(gap_rMAE_scores)
print(gap_RMSE_scores)
print(gap_rRMSE_scores)

  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


[24.07496735259873, 24.07496735259873, 24.07496735259873, 24.07496735259873, 24.07496735259873, 24.07496735259873, 24.07496735259873, 24.07496735259873, 24.07496735259873, 24.07496735259873, 24.07496735259873, 24.07496735259873, 24.07496735259873, 24.07496735259873, 24.07496735259873, 24.07496735259873, 24.07496735259873, 24.07496735259873, 24.07496735259873, 24.07496735259873, 24.07496735259873, 24.07496735259873, 24.07496735259873, 24.07496735259873]
[5.831640875827603, 5.831640875827603, 5.831640875827603, 5.831640875827603, 5.831640875827603, 5.831640875827603, 5.831640875827603, 5.831640875827603, 5.831640875827603, 5.831640875827603, 5.831640875827603, 5.831640875827603, 5.831640875827603, 5.831640875827603, 5.831640875827603, 5.831640875827603, 5.831640875827603, 5.831640875827603, 5.831640875827603, 5.831640875827603, 5.831640875827603, 5.831640875827603, 5.831640875827603, 5.831640875827603]
[79.16117047954225, 79.16117047954225, 79.16117047954225, 79.16117047954225, 79.161170

  positive)
