## Use prediction models to predict feeder future load time series

### Import packages

In [1]:
import numpy as np
import pyarrow.parquet as pq
import joblib
import scipy
import pandas as pd
from pandas import DatetimeIndex
import time
import os
import re
import glob
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import sklearn
from sklearn.model_selection import TimeSeriesSplit
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.neural_network import MLPRegressor as MLP
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from datetime import datetime, timedelta
import seaborn as sns
import pytz
import warnings
warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=DeprecationWarning)
import yaml
import pprint

from src import input_ops
from src import model_ops
from src import aux_ops

### Load config file with scenarios and parameters 

In [44]:
config_file_name = 'config1'; config_path = f"config/{config_file_name}.yaml"; config = input_ops.load_config(config_path)
# pprint.pprint(config, sort_dicts=False) # print config

# lists of parameters to use for TGW df creation
# TGW_scenario = config['TGW_scenario']
# TGW_weather_years = config['TGW_weather_years']
aggregation_level = config['aggregation_level']
# Y_column = config['Y_column']
X_columns = config[config['X_columns_set']]
# CITY_REGIONS_TO_RUN = config['CITY_REGIONS_TO_RUN']

## Initialize parameters for saving paths
input_data_prediction_path = config['input_data_prediction_path']
# output_path_prediction_str = config['output_data_prediction_path']


# print(f"TGW_scenario:{config['TGW_scenario']} \nTGW_years:{TGW_weather_years} \n output:{Y_column} \n prediction model:{prediction_model_str}")
print(f"prediction model aggregation_level:{aggregation_level} \nsmart_ds_years:{config['smart_ds_years'][0]} \nmonths:{config['start_month']}_{config['end_month']}")

prediction model aggregation_level:feeder 
smart_ds_years:2018 
months:1_12


### Option 1: Predict future load using TGW weather data (cooling/heating) 
Note: 10 min for all regions and scenarios (both cooling and heating)

In [None]:
start_time = time.time()

CITY_REGIONS_TO_RUN = {
    "GSO": ["rural", "industrial", "urban-suburban"],
    "AUS": ["P1R", "P1U", "P2U"],
    "SFO": ["P1R", "P1U", "P2U"]
}

# TGW_years_scenarios = { 
#     "2018": ["historical"], 
#     "2058": ["rcp45cooler", "rcp45hotter", "rcp85cooler"], 
# }

TGW_years_scenarios = { 
    "2058": ["rcp85hotter"], 
}

Y_columns = ["cooling_kw_sum","heating_kw"]

for Y_column in Y_columns:
    print(f"Starting prediction for {Y_column}")
    if Y_column == 'cooling_kw_sum':
        prediction_model_str = config['prediction_model_cooling']
        print(f"with prediction model str {prediction_model_str}")
    elif Y_column == 'heating_kw':
        prediction_model_str = config['prediction_model_heating']
        print(f"with prediction model str {prediction_model_str}")
    else:
        raise ValueError('Model architecture is not defined for model output! (only for cooling_sum and heating)')
    output_path_prediction_str = f"main_folder/load_prediction/results/data/prediction/output/{config['smart_ds_years'][0]}/months_{config['start_month']}_{config['end_month']}/{Y_column}/{config['X_columns_set']}/{config['aggregation_level']}/" 
    output_data_training_path_str = f"main_folder/load_prediction/results/data/training/{config['smart_ds_years'][0]}/months_{config['start_month']}_{config['end_month']}/ml_output_data/{Y_column}/{config['X_columns_set']}/{config['aggregation_level']}/"
    print(output_data_training_path_str)   
    
    # Load dictionaries containing models and scalers for all smart-ds regions
    mlp_models = joblib.load(os.path.join(output_data_training_path_str, "models", f"{prediction_model_str}_models_dict.joblib"))
    xnorms = joblib.load(os.path.join(output_data_training_path_str, "scalers", f"{prediction_model_str}_xnorm_dict.joblib"))
    ynorms = joblib.load(os.path.join(output_data_training_path_str, "scalers",f"{prediction_model_str}_ynorm_dict.joblib"))

    smart_ds_years, cities, regions, load_models, building_types = aux_ops.extract_unique_dimensions(mlp_models,CITY_REGIONS_TO_RUN)
    for TGW_weather_year, TGW_scenarios in TGW_years_scenarios.items():
        for TGW_scenario in TGW_scenarios:
            predictions_dict = {}     ## Initialize dictionary
            print(f"Running for year {TGW_weather_year} and scenario {TGW_scenario}")
            for smart_ds_year in smart_ds_years:
                for city in cities:
                    ## Load TGW weather data
                    # Select TGW weather file based on smart-ds city
                    match city:
                        case 'GSO':
                            TGW_location = 'Greensboro'
                        case 'AUS':
                              TGW_location = 'Austin'
                        case 'SFO': 
                              TGW_location = 'SanFrancisco' # we also have Concord TGW but we trained the SFO models with SanFrancisco resstock
                    # Load TGW weather input data 
                    TGW_weather_df_save_path = f"{input_data_prediction_path}/{TGW_location}/{TGW_scenario}/"
                    TGW_weather_df = joblib.load(os.path.join(TGW_weather_df_save_path, f"TGW_weather_{TGW_weather_year}.joblib"))
                    input_df_new = TGW_weather_df 
                    for region in regions:
                        print(f".....Predicting {Y_column} with model {prediction_model_str} trained with SMART-DS data from {smart_ds_year} {city} {region} .....")
                        for load_model in load_models:
                            for building_type in building_types:
                                key = (smart_ds_year, city, region, load_model, building_type)
                                if key in mlp_models:
                                    # print(f"Predicting for {key}") # e.g., Predicting for ('2018', 'AUS', 'P1R', 'p1rhs0_1247--p1rdt6854', 'com')
                                    # Load feeder-specific model and scalers 
                                    ML_model = mlp_models[(smart_ds_year, city, region, load_model, building_type)]
                                    xnorm = xnorms[(smart_ds_year, city, region, load_model, building_type)]
                                    ynorm = ynorms[(smart_ds_year, city, region, load_model, building_type)]

                                    ### Filter, normalize, predict and denormalize output ###
                                    # Filter input data to inputs the model was trained on
                                    X_new = input_df_new[X_columns]
                                    # verify no NaNs or missing features
                                    if X_new.isnull().any().any():
                                        raise ValueError(f"Missing values in input data for {key}")
                                    # Normalize new input data using the scaler that was used for training
                                    X_new_norm = xnorm.transform(X_new)
                                    # Predict normalized y values using the trained MLP model
                                    MLP_y_new_pred_norm = ML_model.predict(X_new_norm).reshape(-1, 1)
                                    # Denormalize the predicted y values back to the original scale
                                    MLP_y_new_pred = ynorm.inverse_transform(MLP_y_new_pred_norm)
                                    y_pred = MLP_y_new_pred

                                    ### Save prediction with weather inputs and datetime ###
                                    df_output = X_new.copy() # initialize df with input features
                                    # Insert the new column at the beginning (index 0)
                                    df_output.insert(0, "date_time", input_df_new["date_time"])
                                    df_output[f"{Y_column}_predicted"] = y_pred
                                    predictions_dict[(TGW_weather_year, city, region, load_model, building_type)]  = df_output

            # Save dictionaries as joblib files
            predictions_dir = os.path.join(output_path_prediction_str, f"{TGW_scenario}/predictions")
            os.makedirs(predictions_dir, exist_ok=True) # Create directories if they don't exist
            joblib.dump(predictions_dict, os.path.join(predictions_dir, f"{prediction_model_str}_TGW_{TGW_weather_year}_models_dict.joblib"))
            print(f"Saved model with {TGW_weather_year} and {TGW_scenario} at path: \n {predictions_dir} \n with name: {prediction_model_str}_TGW_{TGW_weather_year}_models_dict.joblib")

print("All predictions were saved successfully!")

end_time = time.time(); print(f"Runtime for Prediction: {(end_time - start_time) / 60:.2f} minutes")

### Option 2: Load prediction results

In [None]:
TGW_weather_year = '2058'
TGW_scenario = 'rcp85hotter'

# TGW_weather_year = '2018'
# TGW_scenario = 'historical'

Y_column = "cooling_kw_sum" # cooling_kw_sum, heating_kw

if Y_column == 'cooling_kw_sum':
    prediction_model_str = config['prediction_model_cooling']
elif Y_column == 'heating_kw':
    prediction_model_str = config['prediction_model_heating']
else:
    raise ValueError('Model architecture is not defined for model output! (only for cooling_sum and heating)')
output_path_prediction_str = f"main_folder/load_prediction/results/data/prediction/output/{config['smart_ds_years'][0]}/months_{config['start_month']}_{config['end_month']}/{Y_column}/{config['X_columns_set']}/{config['aggregation_level']}/" 


predictions_dir = os.path.join(output_path_prediction_str, f"{TGW_scenario}/predictions")
# Load dictionary with predictions 
loaded_predictions_dict = joblib.load(os.path.join(predictions_dir, f"{prediction_model_str}_TGW_{TGW_weather_year}_models_dict.joblib"))
print(f'Loading model from path: {predictions_dir} \n named: {prediction_model_str}_TGW_{TGW_weather_year}_models_dict.joblib \n with keys:')
loaded_predictions_dict.keys()

In [40]:
df = loaded_predictions_dict[(TGW_weather_year, 'GSO', 'rural', 'rhs2_1247--rdt1264', 'res')]
df_month = df[df['month'] == 8]
df_month

Unnamed: 0,date_time,year,month,day,hour,weekday,weekend,Dry Bulb Temperature [°C],Relative Humidity [%],Wind Speed [m/s],...,Minus 1h Temp,Minus 3h Temp,Minus 6h Temp,Minus 12h Temp,Minus 24h Temp,sin hour,cos hour,temp times sin hour,temp times cos hour,cooling_kw_sum_predicted
5088,2058-08-01 00:00:00-05:00,2058,8,1,0,3,0,25.91952,96.667990,1.980850,...,26.29104,26.74322,27.64288,31.03094,24.99430,0.000000,1.000000,0.000000,25.919520,1047.445139
5089,2058-08-01 01:00:00-05:00,2058,8,1,1,3,0,25.46588,97.060760,2.471320,...,25.91952,26.53040,26.88976,29.36030,25.08822,0.258819,0.965926,6.591055,24.598151,899.748314
5090,2058-08-01 02:00:00-05:00,2058,8,1,2,3,0,25.12190,96.852530,2.307980,...,25.46588,26.29104,26.55038,29.51367,25.11900,0.500000,0.866025,12.560950,21.756204,810.464728
5091,2058-08-01 03:00:00-05:00,2058,8,1,3,3,0,24.62783,97.467000,1.864477,...,25.12190,25.91952,26.74322,29.42687,25.18716,0.707107,0.707107,17.414506,17.414506,757.213187
5092,2058-08-01 04:00:00-05:00,2058,8,1,4,3,0,24.25466,99.117540,2.069077,...,24.62783,25.46588,26.53040,29.26150,25.26653,0.866025,0.500000,21.005152,12.127330,750.879845
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5827,2058-08-31 19:00:00-05:00,2058,8,31,19,5,1,29.76562,66.227410,1.418489,...,30.37246,33.87942,34.19717,27.26943,27.23828,-0.965926,0.258819,-28.751381,7.703909,2052.818501
5828,2058-08-31 20:00:00-05:00,2058,8,31,20,5,1,28.93713,68.442360,0.700742,...,29.76562,32.38530,34.76403,29.14535,26.78124,-0.866025,0.500000,-25.060290,14.468565,1820.977611
5829,2058-08-31 21:00:00-05:00,2058,8,31,21,5,1,27.45803,79.872310,1.297132,...,28.93713,30.37246,34.90640,31.04280,25.92867,-0.707107,0.707107,-19.415759,19.415759,1621.734933
5830,2058-08-31 22:00:00-05:00,2058,8,31,22,5,1,26.63010,84.011690,0.663005,...,27.45803,29.76562,33.87942,32.82867,25.52690,-0.500000,0.866025,-13.315050,23.062343,1474.800192
