# BTAP_ML Phase 4 Experiment Template
This template allows the BTAP_ML system to be tested in a notebook setting. The first portion can preprocess data to then be reused for any future tests.   
Currently the notebook requires changes to be made to the actual BTAP_ML files before the experiments are run, but these changes should never be committed, only
done for testing purposes.

In [None]:
import sys
import os
# If being run from the notebooks directory, access the src directory
sys.path.append(os.path.abspath('../src'))
import preprocessing as pre
import models.training_model as tr_model
import feature_selection as fs
import predict as pred
import config as config
from datetime import datetime
from pathlib import Path

# One-time preprocessing per test scenario
Run the following blocks of code to preprocess the data and extract the output files. Note that some manual changes may be needed and are commented accordingly.

In [None]:
# Can just use the absolute path here for the input_config.yml file
config_file = ''

# Must point to where the files are held
DOCKER_INPUT_PATH = config.Settings().APP_CONFIG.DOCKER_INPUT_PATH
INPUT_CONFIG_FILENAME = "input_config.yml"
random_seed = 1
# Load the settings
settings = config.Settings()
# Set the perform_param_search parameter to 'no', this is hard-coded since we
# want to leave the infrastructure for it in, but remove the ability to use it for now
perform_param_search = 'no'
# Begin by loading the config file
cfg = config.get_config(config_file)
random_seed = cfg.get(config.Settings().APP_CONFIG.RANDOM_SEED)
# If the energy or building electricity files are not provided, load the files

hourly_energy_electric_file = cfg.get(config.Settings().APP_CONFIG.ENERGY_PARAM_FILES)[0]
building_params_electric_file = cfg.get(config.Settings().APP_CONFIG.BUILDING_PARAM_FILES)[0]
hourly_energy_gas_file = cfg.get(config.Settings().APP_CONFIG.ENERGY_PARAM_FILES)[1]
building_params_gas_file = cfg.get(config.Settings().APP_CONFIG.BUILDING_PARAM_FILES)[1]
val_hourly_energy_file = cfg.get(config.Settings().APP_CONFIG.VAL_ENERGY_PARAM_FILE)
val_building_params_file = cfg.get(config.Settings().APP_CONFIG.VAL_BUILDING_PARAM_FILE)
estimator_type = cfg.get(config.Settings().APP_CONFIG.ESTIMATOR_TYPE)
perform_param_search = cfg.get(config.Settings().APP_CONFIG.PARAM_SEARCH)

# Identify the training processes to be taken and whether the updated model should
# be used for the specified training (energy and/or costing)
TRAINING_PROCESSES = [[config.Settings().APP_CONFIG.ENERGY, True],
                        [config.Settings().APP_CONFIG.COSTING, True]]

# Create directory to hold all data for the run (datetime/...)
# If used, copy the config file within the directory to log the input values
output_path_root = config.Settings().APP_CONFIG.DOCKER_OUTPUT_PATH
# With Windows, the colon may cause issues depending on how the
# dependencies work with them, thus they are removed
output_path_root = Path(output_path_root).joinpath(settings.APP_CONFIG.TRAIN_BUCKET_NAME + str(datetime.now()).replace(":", "-"))

In [None]:
# Store all the data in the verification data structure
input_model = tr_model.TrainingModel(input_prefix=DOCKER_INPUT_PATH,
                                    config_file='input_config.yml',
                                    random_seed=random_seed,
                                    building_param_files=[building_params_electric_file,
                                                          building_params_gas_file],
                                    energy_param_files=[hourly_energy_electric_file,
                                                        hourly_energy_gas_file],
                                    val_hourly_energy_file=val_hourly_energy_file,
                                    val_building_params_file=val_building_params_file,
                                    skip_file_preprocessing=False,
                                    preprocessed_data_file='',
                                    estimator_type=estimator_type,
                                    skip_feature_selection=False,
                                    selected_features_file='',
                                    perform_param_search='no',
                                    skip_model_training=False)

In [None]:
skip_file_preprocessing = True
# Will need to change one config.py value (i.e. remove one of the two keys and call the below cell twice)
training_processes = ['energy', 'costing']
output_path_root = 'preprocessed_outputs/'

In [None]:
# For costing, change DOCKER_SRC_PATH: str = 'src/' in src/config.py to DOCKER_SRC_PATH: str = '../src/' 
for training_process in training_processes:
    # Change the output path to be an existing folder (i.e. preprocessed_outputs/)
    output_path = output_path_root + training_process

    config.create_directory(str(output_path))

    preprocessed_data_file = pre.main(config_file=input_model.config_file,
                            process_type=training_process,
                            hourly_energy_electric_file=input_model.energy_param_files[0],
                            building_params_electric_file=input_model.building_param_files[0],
                            val_hourly_energy_file=input_model.val_hourly_energy_file,
                            val_building_params_file=input_model.val_building_params_file,
                            hourly_energy_gas_file=input_model.energy_param_files[1],
                            building_params_gas_file=input_model.building_param_files[1],
                            output_path='preprocessed_outputs/' + training_process,
                            preprocess_only_for_predictions=False,
                            random_seed=input_model.random_seed,
                            building_params_folder='',
                            start_date='',
                            end_date='',
                            ohe_file='',
                            cleaned_columns_file='')

# Feature selection and training

In [None]:
# Preprocessing file relative path
input_model.preprocessed_data_file = 'preprocessed_outputs\\costing\\preprocessing\\preprocessing.json'
# Can change whether energy or costing is being tested
training_process = 'costing' # energy or costing
output_path = output_path_root + training_process

In [None]:
# Feature selection
input_model.selected_features_file = fs.main(input_model.config_file,
                                            input_model.preprocessed_data_file,
                                            input_model.estimator_type,
                                            output_path)

In [None]:
# Model training
model_path, train_results = pred.main(input_model.config_file,
                                        training_process,
                                        input_model.preprocessed_data_file,
                                        input_model.selected_features_file,
                                        input_model.perform_param_search,
                                        output_path,
                                        input_model.random_seed,
                                        input_model.building_param_files[0],
                                        input_model.building_param_files[1],
                                        input_model.val_building_params_file,
                                        True,
                                        True)