## Predict feeder electricity demand time series

### Import packages

In [1]:
import numpy as np
import pyarrow.parquet as pq
import joblib
import scipy
import pandas as pd
from pandas import DatetimeIndex
import time
import os
import re
import glob
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import sklearn
from sklearn.model_selection import TimeSeriesSplit
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.neural_network import MLPRegressor as MLP
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from datetime import datetime, timedelta
import seaborn as sns
import pytz
import warnings
warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=DeprecationWarning)
import yaml
import pprint

from src import input_ops
from src import model_ops
from src import aux_ops

### Load config file with scenarios and parameters 

In [1]:
config_file_name = 'config1'; config_path = f"config/{config_file_name}.yaml"; config = input_ops.load_config(config_path)
aggregation_level = config['aggregation_level']
X_columns = config[config['X_columns_set']]
input_data_prediction_path = config['input_data_prediction_path']
print(f"prediction model aggregation_level:{aggregation_level} \nsmart_ds_years:{config['smart_ds_years'][0]} \nmonths:{config['start_month']}_{config['end_month']}")

### Predict future load using TGW weather data (cooling/heating) 
Note: should take about 10 min for all regions and scenarios (both cooling and heating)

In [None]:
start_time = time.time()

CITY_REGIONS_TO_RUN = {
    "GSO": ["rural"],
}

TGW_years_scenarios = { 
    "2018": ["historical"], 
}

Y_columns = ["cooling_kw_sum","heating_kw"]

for Y_column in Y_columns:
    print(f"Starting prediction for {Y_column}")
    if Y_column == 'cooling_kw_sum':
        prediction_model_str = config['prediction_model_cooling']
        print(f"with prediction model str {prediction_model_str}")
    elif Y_column == 'heating_kw':
        prediction_model_str = config['prediction_model_heating']
        print(f"with prediction model str {prediction_model_str}")
    else:
        raise ValueError('Model architecture is not defined for model output! (only for cooling_sum and heating)')
    output_path_prediction_str = f"main_folder/load_prediction/results/data/prediction/output/{config['smart_ds_years'][0]}/months_{config['start_month']}_{config['end_month']}/{Y_column}/{config['X_columns_set']}/{config['aggregation_level']}/" 
    output_data_training_path_str = f"main_folder/load_prediction/results/data/training/{config['smart_ds_years'][0]}/months_{config['start_month']}_{config['end_month']}/ml_output_data/{Y_column}/{config['X_columns_set']}/{config['aggregation_level']}/"
    print(output_data_training_path_str)   
    
    # Load dictionaries containing models and scalers for all smart-ds regions
    mlp_models = joblib.load(os.path.join(output_data_training_path_str, "models", f"{prediction_model_str}_models_dict.joblib"))
    xnorms = joblib.load(os.path.join(output_data_training_path_str, "scalers", f"{prediction_model_str}_xnorm_dict.joblib"))
    ynorms = joblib.load(os.path.join(output_data_training_path_str, "scalers",f"{prediction_model_str}_ynorm_dict.joblib"))

    smart_ds_years, cities, regions, load_models, building_types = aux_ops.extract_unique_dimensions(mlp_models,CITY_REGIONS_TO_RUN)
    for TGW_weather_year, TGW_scenarios in TGW_years_scenarios.items():
        for TGW_scenario in TGW_scenarios:
            predictions_dict = {}     ## Initialize dictionary
            print(f"Running for year {TGW_weather_year} and scenario {TGW_scenario}")
            for smart_ds_year in smart_ds_years:
                for city in cities:
                    ## Load TGW weather data
                    # Select TGW weather file based on smart-ds city
                    match city:
                        case 'GSO':
                            TGW_location = 'Greensboro'
                        case 'AUS':
                              TGW_location = 'Austin'
                        case 'SFO': 
                              TGW_location = 'SanFrancisco' 
                    # Load TGW weather input data 
                    TGW_weather_df_save_path = f"{input_data_prediction_path}/{TGW_location}/{TGW_scenario}/"
                    TGW_weather_df = joblib.load(os.path.join(TGW_weather_df_save_path, f"TGW_weather_{TGW_weather_year}.joblib"))
                    input_df_new = TGW_weather_df 
                    for region in regions:
                        print(f".....Predicting {Y_column} with model {prediction_model_str} trained with SMART-DS data from {smart_ds_year} {city} {region} .....")
                        for load_model in load_models:
                            for building_type in building_types:
                                key = (smart_ds_year, city, region, load_model, building_type)
                                if key in mlp_models:
                                    # Load feeder-specific model and scalers 
                                    ML_model = mlp_models[(smart_ds_year, city, region, load_model, building_type)]
                                    xnorm = xnorms[(smart_ds_year, city, region, load_model, building_type)]
                                    ynorm = ynorms[(smart_ds_year, city, region, load_model, building_type)]

                                    ### Filter, normalize, predict and denormalize output ###
                                    # Filter input data to inputs the model was trained on
                                    X_new = input_df_new[X_columns]
                                    # verify no NaNs or missing features
                                    if X_new.isnull().any().any():
                                        raise ValueError(f"Missing values in input data for {key}")
                                    # Normalize new input data using the scaler that was used for training
                                    X_new_norm = xnorm.transform(X_new)
                                    # Predict normalized y values using the trained MLP model
                                    MLP_y_new_pred_norm = ML_model.predict(X_new_norm).reshape(-1, 1)
                                    # Denormalize the predicted y values back to the original scale
                                    MLP_y_new_pred = ynorm.inverse_transform(MLP_y_new_pred_norm)
                                    y_pred = MLP_y_new_pred

                                    ### Save prediction with weather inputs and datetime ###
                                    df_output = X_new.copy() # initialize df with input features
                                    # Insert the new column at the beginning (index 0)
                                    df_output.insert(0, "date_time", input_df_new["date_time"])
                                    df_output[f"{Y_column}_predicted"] = y_pred
                                    predictions_dict[(TGW_weather_year, city, region, load_model, building_type)]  = df_output

            # Save dictionaries as joblib files
            predictions_dir = os.path.join(output_path_prediction_str, f"{TGW_scenario}/predictions")
            os.makedirs(predictions_dir, exist_ok=True) # Create directories if they don't exist
            joblib.dump(predictions_dict, os.path.join(predictions_dir, f"{prediction_model_str}_TGW_{TGW_weather_year}_models_dict.joblib"))
            print(f"Saved model with {TGW_weather_year} and {TGW_scenario} at path: \n {predictions_dir} \n with name: {prediction_model_str}_TGW_{TGW_weather_year}_models_dict.joblib")

print("All predictions were saved successfully!")

end_time = time.time(); print(f"Runtime for Prediction: {(end_time - start_time) / 60:.2f} minutes")

### Load prediction results

In [None]:
TGW_weather_year = '2018'
TGW_scenario = 'historical'

Y_column = "cooling_kw_sum" # cooling_kw_sum, heating_kw

if Y_column == 'cooling_kw_sum':
    prediction_model_str = config['prediction_model_cooling']
elif Y_column == 'heating_kw':
    prediction_model_str = config['prediction_model_heating']
else:
    raise ValueError('Model architecture is not defined for model output! (only for cooling_sum and heating)')
output_path_prediction_str = f"main_folder/load_prediction/results/data/prediction/output/{config['smart_ds_years'][0]}/months_{config['start_month']}_{config['end_month']}/{Y_column}/{config['X_columns_set']}/{config['aggregation_level']}/" 


predictions_dir = os.path.join(output_path_prediction_str, f"{TGW_scenario}/predictions")
# Load dictionary with predictions 
loaded_predictions_dict = joblib.load(os.path.join(predictions_dir, f"{prediction_model_str}_TGW_{TGW_weather_year}_models_dict.joblib"))
print(f'Loading model from path: {predictions_dir} \n named: {prediction_model_str}_TGW_{TGW_weather_year}_models_dict.joblib \n with keys:')
loaded_predictions_dict.keys()