In [None]:
import sys
sys.path.append("../../")
sys.path.append("../../outboxml")

In [None]:
import pandas as pd
import numpy as np

from outboxml.automl_manager import AutoMLManager
from outboxml.extractors import Extractor


import config
from outboxml.metrics.base_metrics import BaseMetric
from outboxml.metrics.business_metrics import BaseCompareBusinessMetric

In [None]:
class EnergyEfficiencyExtractor(Extractor):
    def __init__(self,
                 path_to_file: str
                 ):
        self.__path_to_file = path_to_file
        super().__init__()

    def extract_dataset(self) -> pd.DataFrame:
        data = pd.read_csv(self.__path_to_file)
        #Transform targets to one Energy Efficiency Index (EEI) using EEI = (Heating_Load + Cooling_Load) / Surface_Area
        data['EEI'] = (data['Heating Load'] + data['Cooling Load'])/data['Surface Area']
        
        #Combine Wall Area and Roof Area 
        data['Wall_Roof_Ratio'] = data['Wall Area'] / data['Roof Area']
        #Combine compactness with height
        data['Compactness_Height'] = data['Relative Compactness'] * data['Overall Height']

        final_features = ['Wall_Roof_Ratio',  'Compactness_Height','Glazing Area Distribution','Orientation', 'EEI']
        return data[final_features]  
 
class EnergyEfficiencyMetrics(BaseMetric):
    def __init__(self):
        pass

    def calculate_metric(self, result1: dict, result2: dict = None) -> dict:
        y_true = result1['EEI'].y
        y_pred = result1['EEI'].y_pred
        
        return {'MAPE': np.mean(np.abs((y_true - y_pred) / y_true)) * 100}
        
config_name = './configs/config-energy-efficiency-extractor.json'
auto_ml_config = './configs/automl-energy-efficiency-automl.json'

In [None]:
auto_ml = AutoMLManager(auto_ml_config=auto_ml_config,
                        models_config=config_name,
                        business_metric=EnergyEfficiencyMetrics(),
                        external_config=config,
                        extractor=EnergyEfficiencyExtractor(path_to_file="data/ENB2012_data.csv"),
                        compare_business_metric=BaseCompareBusinessMetric(),
                        save_temp=False,
                        hp_tune=False,
                        )
auto_ml.update_models(send_mail=False)

In [None]:
from outboxml.export_results import ResultExport
ResultExport(ds_manager=auto_ml).plots('EEI',  features = ['Wall_Roof_Ratio'], bins_for_numerical_features = 3)

In [None]:
ResultExport(ds_manager=auto_ml).plots('EEI',plot_type = 0)

In [None]:
!pip install seaborn
import matplotlib.pyplot as plt
import seaborn as sns
    
def plot_info(df1, df2=None, model_name=None, features=None, bins=None):
    sns.set(style="whitegrid", font_scale=1.2)
    plt.figure(figsize=(8, 6))
    sns.scatterplot(x=df1['y_true'], y=df1['y_prediction'], color='royalblue', alpha=0.6)
    
    min_val = min(df1['y_true'].min(), df1['y_prediction'].min())
    max_val = max(df1['y_true'].max(), df1['y_prediction'].max())
    plt.plot([min_val, max_val], [min_val, max_val], color='red', linestyle='--', label='Perfect Prediction')
    
    plt.xlabel("EEI True Values")
    plt.ylabel("EEI Predicted Values")
    plt.title("True vs Predicted Values")
    plt.legend()
    plt.tight_layout()
    plt.show()
    
ResultExport(ds_manager=auto_ml).plots('EEI', user_plot_func = plot_info  )