## Convert Feeder level resstock cooling and heating prediction to building level resstock total 

### Import packages

In [1]:
import numpy as np
import pyarrow.parquet as pq
import joblib
import scipy
import pandas as pd
from pandas import DatetimeIndex
import time
import os
import re
import glob
import gc
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

from datetime import datetime, timedelta
import seaborn as sns
import pytz

import yaml
import pprint

from src import input_ops
from src import file_ops

### Load config file with scenarios and parameters 

In [1]:
config_file_name = 'config1'; config_path = f"config/{config_file_name}.yaml"; config = input_ops.load_config(config_path)

# lists of parameters to use for TGW df creation
TGW_scenario = config['TGW_scenario']
TGW_weather_years = config['TGW_weather_years']
TGW_weather_year = TGW_weather_years[0]
if int(TGW_weather_year) < 2020 and TGW_scenario != 'historical':
    raise ValueError("Error: Check config file. Note that for TGW year < 2020 TGW scenario should be 'Historical'")

CITY_REGIONS_TO_RUN = config['CITY_REGIONS_TO_RUN']

demand_mode = config['demand_mode']
prediction_model_str_cooling = config['prediction_model_cooling']
prediction_model_str_heating = config['prediction_model_heating']
aggregation_level = config['aggregation_level']
X_columns = config[config['X_columns_set']]

input_data_dict_name = config['input_data_dict_name']
aggregation_level = config['aggregation_level']
building_types = config["building_types"]

input_data_training_path = config['input_data_training_path']
start_month = config['start_month']
end_month = config['end_month']

## Initialize parameters for saving paths
Y_column = config['Y_column']
input_data_prediction_path = config['input_data_prediction_path']
output_path_prediction_str = config['output_data_prediction_path']

smart_ds_year = config['smart_ds_years'][0]
smart_ds_load_path = config['smart_ds_load_path'] + f"/{smart_ds_year}"
   
print(f"TGW_scenario:{config['TGW_scenario']} \nTGW_years:{TGW_weather_years} \nsmart_ds_year: {smart_ds_year} \nlocations:{CITY_REGIONS_TO_RUN}")

## Load data 

### Load measured and predicted feeder cooling and heating (all regions)

In [2]:
## Load input load & weather data of year-city-region-building_type combinations
measured_feeder_cool_n_heat_dict = joblib.load(os.path.join(config["input_data_training_path"], f"{input_data_dict_name}.joblib")) # Load dictionary with load & weather data 

# Load dictionary with predictions 
Y_column = 'heating_kw'
output_path_prediction_str = f"main_folder/load_prediction/results/data/prediction/output/{config['smart_ds_years'][0]}/months_{config['start_month']}_{config['end_month']}/{Y_column}/{config['X_columns_set']}/{config['aggregation_level']}/" 
predictions_dir = os.path.join(output_path_prediction_str, f"{TGW_scenario}/predictions")
heating_loaded_predictions_dict = joblib.load(os.path.join(predictions_dir, f"{prediction_model_str_heating}_TGW_{TGW_weather_year}_models_dict.joblib"))

Y_column = 'cooling_kw_sum'
cooling_output_path_prediction_str = f"main_folder/load_prediction/results/data/prediction/output/{config['smart_ds_years'][0]}/months_{config['start_month']}_{config['end_month']}/{Y_column}/{config['X_columns_set']}/{config['aggregation_level']}/"
output_path_prediction_str = f"main_folder/load_prediction/results/data/prediction/output/{config['smart_ds_years'][0]}/months_{config['start_month']}_{config['end_month']}/{Y_column}/{config['X_columns_set']}/{config['aggregation_level']}/" 
predictions_dir = os.path.join(output_path_prediction_str, f"{TGW_scenario}/predictions")
cooling_loaded_predictions_dict = joblib.load(os.path.join(predictions_dir, f"{prediction_model_str_cooling}_TGW_{TGW_weather_year}_models_dict.joblib"))

feeder_predict_cool_heat_dict = heating_loaded_predictions_dict

for feeder_name in feeder_predict_cool_heat_dict:
    feeder_predict_cool_heat_dict[feeder_name]['cooling_kw_sum_predicted'] = cooling_loaded_predictions_dict[feeder_name]['cooling_kw_sum_predicted']

# Free memory from unused data objects 
del cooling_loaded_predictions_dict
del heating_loaded_predictions_dict
gc.collect()

## Print dictionary structure

In [3]:
print("Dictionary sample keys and dataframe structure:")
file_ops.print_nested_dict_key_examples_and_dataframe_details(feeder_predict_cool_heat_dict)
df = file_ops.return_leaf_dataframe(feeder_predict_cool_heat_dict, key_number=2, n=2)

## Disaggregate feeder prediction to buildings

### Disaggregate feeder prediction to buildings 
note: run a single region at a time to reduce runtime and avoid memory issues

In [None]:
start_time = time.time()

print(f"Starting disaggregation process for TGW_weather_year: {TGW_weather_year}  TGW_scenario: {TGW_scenario} \n CITY_REGIONS_TO_RUN:{CITY_REGIONS_TO_RUN}")

# Loop through all city, region, year, and building type combinations
for city, regions in CITY_REGIONS_TO_RUN.items():
    if aggregation_level != 'feeder':
        raise ValueError("This code supports feeder-level aggregation only - check aggregation_level value in config file")
    for region in regions:
        # Initialize dictionary to save predicted total building load profiles
        building_predicted_total_dict = {}
        building_predicted_cool_n_heat_dict = {}
        # Load measured cooling and heating building data for current region
        input_data_region_dir = f'{smart_ds_load_path}/{city}/{region}/buildings'
        measured_buildings_cool_heat_dict = joblib.load(os.path.join(input_data_region_dir, "measured_buildings_cool_heat_dict.joblib")) 
        for outer_key, inner_dict in measured_buildings_cool_heat_dict.items():
            print(f"Disaggregating feeder for scenario: {outer_key}") # outer_key = (smart_ds_year, city, region, feeder, building_type)
            # --- For each feeder ---
            # Load measured feeder feeder
            feeder_cooling_series = measured_feeder_cool_n_heat_dict[outer_key]['cooling_kw_sum']
            feeder_heating_series = measured_feeder_cool_n_heat_dict[outer_key]['heating_kw']

            # load predicted feeder 
            feeder_pred_data = feeder_predict_cool_heat_dict[(TGW_weather_year, outer_key[1], outer_key[2], outer_key[3], outer_key[4])]

            # Reindex the predicted feeder data to match building measured data
            # Use the datetime index from measured data
            datetime_index = feeder_cooling_series.index
            assert len(feeder_pred_data) == len(datetime_index), "Mismatch in time series length"
            # Reindex the predicted feeder data to match
            feeder_pred_data_aligned = feeder_pred_data.copy()
            feeder_pred_data_aligned.index = datetime_index

            # Initialize inner dicts
            building_predicted_total_dict[outer_key] = {}
            building_predicted_cool_n_heat_dict[outer_key] = {}
      
            for building_name, df in inner_dict.items():
                # --- For each building in feeder ---

                building_data = measured_buildings_cool_heat_dict[outer_key][building_name]

                building_cooling_series = building_data['cooling_sum_kw']
                building_heating_series = building_data['heating_kw']
                building_non_cooling_heating_series = building_data['non_cool_n_heat_kw']

                #  Using .div() and fillna(0) for safe division and to handle 0/0 or NaN cases
                cooling_ratio = building_cooling_series.div(feeder_cooling_series).fillna(0)
                heating_ratio = building_heating_series.div(feeder_heating_series).fillna(0)

                # Compute predicted total
                building_predicted_total = (
                    cooling_ratio * feeder_pred_data_aligned['cooling_kw_sum_predicted'] +
                    heating_ratio * feeder_pred_data_aligned['heating_kw_predicted'] +
                    building_non_cooling_heating_series
                )

                # Save to nested dicts
                building_predicted_total_dict[outer_key][building_name] = building_predicted_total
                
                # Initialize the nested dict if it doesn't exist yet
                if building_name not in building_predicted_cool_n_heat_dict[outer_key]:
                    building_predicted_cool_n_heat_dict[outer_key][building_name] = {}

                building_df = pd.DataFrame({
                    'cooling_ratio': cooling_ratio,
                    'cooling_kw_sum_predicted': cooling_ratio * feeder_pred_data_aligned['cooling_kw_sum_predicted'],
                    'heating_ratio': heating_ratio,
                    'heating_kw_predicted': heating_ratio * feeder_pred_data_aligned['heating_kw_predicted'],
                    'total_kw_predicted': building_predicted_total,
                    'Dry Bulb Temperature [°C]': feeder_pred_data_aligned['Dry Bulb Temperature [°C]']
                })

                building_predicted_cool_n_heat_dict[outer_key][building_name] = building_df

              
        # # Save the dictionary as joblib files
        print(f'saving building prediction joblib for {city} {region}')
        feeder_predictions_path = f"/nfs/turbo/seas-mtcraig-climate/Aviad/load_prediction/results/data/prediction/output/{config['smart_ds_years'][0]}/months_{config['start_month']}_{config['end_month']}/cooling_n_heating/{config['X_columns_set']}/{config['aggregation_level']}/" 
        predictions_dir = os.path.join(feeder_predictions_path, f"{TGW_scenario}/predictions/{city}/{region}/")
        os.makedirs(predictions_dir, exist_ok=True) # Create directories if they don't exist
        print("Saving joblib in: ")
        print(os.path.join(predictions_dir, f"{demand_mode}_TGW_{TGW_weather_year}_buildings_dict.joblib"))
        joblib.dump(building_predicted_total_dict, os.path.join(predictions_dir, f"{demand_mode}_TGW_{TGW_weather_year}_buildings_dict.joblib"))
        joblib.dump(building_predicted_cool_n_heat_dict, os.path.join(predictions_dir, f"{demand_mode}_TGW_{TGW_weather_year}_buildings_cool_n_heat_dict.joblib"))
        
        # Free memory from unused data objects 
        del measured_buildings_cool_heat_dict
        del building_predicted_total_dict
        del building_predicted_cool_n_heat_dict
        gc.collect()  
    
end_time = time.time(); print(f"Runtime for Prediction: {(end_time - start_time) / 60:.2f} minutes")