In [1]:
import os

In [2]:
%pwd

'/Users/rociocuatecontzi/Desktop/programming/electricity-forecast/research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'/Users/rociocuatecontzi/Desktop/programming/electricity-forecast'

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    trained_model_path: Path
    training_data: Path
    testing_data: Path
    params_base_score: float
    params_booster: str
    params_n_estimators: int
    params_early_stopping_rounds: int
    params_objective: str
    params_max_depth: int
    params_learning_rate: float
    target_column: str
    columns: str

In [6]:
from src.eForecaster.constants import *
from eForecaster.utils.common import read_yaml, create_directories

In [7]:
import xgboost as xgb
import pandas as pd

In [8]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_training_config(self) -> TrainingConfig:
        training = self.config.training
        params = self.params.XGRBoost
        schema =  self.schema
        

        create_directories([
            Path(training.root_dir)
        ])

        training_config = TrainingConfig(
            root_dir=Path(training.root_dir),
            trained_model_path=Path(training.trained_model_path),
            training_data=Path(training.train_dataset_path),
            testing_data=Path(training.test_dataset_path),
            params_base_score= params.BASE_SCORE,
            params_booster=params.BOOSTER,
            params_n_estimators=params.N_ESTIMATORS,
            params_early_stopping_rounds=params.EARLY_STOPPING_ROUNDS,
            params_objective=params.OBJECTIVE,
            params_max_depth=params.MAX_DEPTH,
            params_learning_rate= params.LEARNING_RATE,
            target_column = schema.TARGET_COLUMN.name,
            columns = schema.COLUMNS,
        )
        return training_config



In [9]:
import os
import urllib.request as request
from zipfile import ZipFile
import time
import pandas as pd
import joblib

In [10]:
class Training:
    def __init__(self, config: TrainingConfig):
        self.config = config
    
    def get_dataset(self):
        self.training_data = self.config.training_data
        self.testing_data = self.config.testing_data
        df_train = pd.read_csv(self.training_data)
        df_test = pd.read_csv(self.testing_data)
        return df_train, df_test

    def get_features_target(self, df_train, df_test):

        
        FEATURES = [item for item in self.config.columns.keys()]
        TARGET = self.config.target_column
        X_train = df_train[FEATURES]
        y_train = df_train[TARGET]
        X_test = df_test[FEATURES]
        y_test = df_test[TARGET]
        return X_train, y_train, X_test, y_test
    
    @staticmethod
    def save_model(path, model):
        joblib.dump(model, path)
    def train(self, X_train, y_train, X_test, y_test):
        self.model = xgb.XGBRegressor(base_score=self.config.params_base_score,
                                      booster=self.config.params_booster,
                                      n_estimators=self.config.params_n_estimators,
                                      objective=self.config.params_objective,
                                      max_depth=self.config.params_max_depth,
                                      learning_rate=self.config.params_learning_rate)
        self.model.fit(
            X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)]
        )
        self.save_model(
            path=self.config.trained_model_path,
            model = self.model
        )
        pass

In [11]:
try:
    config = ConfigurationManager()

    training_config = config.get_training_config()
    training = Training(config=training_config)
    df_train, df_test = training.get_dataset()
    X_train, y_train, X_test, y_test = training.get_features_target(df_train, df_test)
    training.train(X_train, y_train, X_test, y_test)
    
except Exception as e:
    raise e

[2024-01-28 14:09:04,964: INFO: common: yaml file: config/config.yaml loaded successfully]
[2024-01-28 14:09:04,969: INFO: common: yaml file: params.yaml loaded successfully]
[2024-01-28 14:09:04,976: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-01-28 14:09:04,977: INFO: common: created directory at: artifacts]
[2024-01-28 14:09:04,979: INFO: common: created directory at: artifacts/training]


[0]	validation_0-rmse:4698.15419	validation_1-rmse:5025.81747
[1]	validation_0-rmse:4651.46897	validation_1-rmse:4978.05865
[2]	validation_0-rmse:4605.25380	validation_1-rmse:4930.76761
[3]	validation_0-rmse:4559.50313	validation_1-rmse:4883.93169
[4]	validation_0-rmse:4514.21331	validation_1-rmse:4837.55775
[5]	validation_0-rmse:4469.37881	validation_1-rmse:4791.99840
[6]	validation_0-rmse:4424.99538	validation_1-rmse:4746.51469




[7]	validation_0-rmse:4381.05834	validation_1-rmse:4701.86742
[8]	validation_0-rmse:4337.56361	validation_1-rmse:4657.28971
[9]	validation_0-rmse:4294.50631	validation_1-rmse:4613.53641
[10]	validation_0-rmse:4251.88203	validation_1-rmse:4569.85268
[11]	validation_0-rmse:4209.68686	validation_1-rmse:4526.95651
[12]	validation_0-rmse:4167.91612	validation_1-rmse:4484.14057
[13]	validation_0-rmse:4126.56618	validation_1-rmse:4442.11675
[14]	validation_0-rmse:4085.63167	validation_1-rmse:4400.13160
[15]	validation_0-rmse:4045.11098	validation_1-rmse:4359.05301
[16]	validation_0-rmse:4004.99661	validation_1-rmse:4317.89929
[17]	validation_0-rmse:3965.28781	validation_1-rmse:4277.64541
[18]	validation_0-rmse:3925.97804	validation_1-rmse:4237.35336
[19]	validation_0-rmse:3887.06574	validation_1-rmse:4197.81586
[20]	validation_0-rmse:3848.54405	validation_1-rmse:4158.41782
[21]	validation_0-rmse:3810.41020	validation_1-rmse:4119.34694
[22]	validation_0-rmse:3772.66197	validation_1-rmse:4081.0

[2024-01-27 13:32:52,091: INFO: common: yaml file: config/config.yaml loaded successfully]
[2024-01-27 13:32:52,093: INFO: common: yaml file: params.yaml loaded successfully]
[2024-01-27 13:32:52,095: INFO: common: created directory at: artifacts]
[2024-01-27 13:32:52,096: INFO: common: created directory at: artifacts/training]
[0]	validation_0-rmse:4698.26670	validation_1-rmse:5026.74335
[1]	validation_0-rmse:4651.69420	validation_1-rmse:4979.88995
[2]	validation_0-rmse:4605.59175	validation_1-rmse:4933.51840
[3]	validation_0-rmse:4559.95423	validation_1-rmse:4887.60644
[4]	validation_0-rmse:4514.77739	validation_1-rmse:4842.48284
[5]	validation_0-rmse:4470.05609	validation_1-rmse:4797.49167
[6]	validation_0-rmse:4425.78647	validation_1-rmse:4753.27499


Parameters: { "estimators" } are not used.



[7]	validation_0-rmse:4381.96346	validation_1-rmse:4709.18263
[8]	validation_0-rmse:4338.58790	validation_1-rmse:4665.74835
[9]	validation_0-rmse:4295.64424	validation_1-rmse:4622.84680
[10]	validation_0-rmse:4253.13833	validation_1-rmse:4580.04038
[11]	validation_0-rmse:4211.05685	validation_1-rmse:4538.00244
[12]	validation_0-rmse:4169.40534	validation_1-rmse:4496.29520
[13]	validation_0-rmse:4128.17357	validation_1-rmse:4454.74452
[14]	validation_0-rmse:4087.35787	validation_1-rmse:4412.75039
[15]	validation_0-rmse:4046.94902	validation_1-rmse:4372.39904
[16]	validation_0-rmse:4006.95411	validation_1-rmse:4332.33017
[17]	validation_0-rmse:3967.36255	validation_1-rmse:4291.56657
[18]	validation_0-rmse:3928.16519	validation_1-rmse:4252.42768
[19]	validation_0-rmse:3889.36971	validation_1-rmse:4213.32900
[20]	validation_0-rmse:3850.96429	validation_1-rmse:4174.11301
[21]	validation_0-rmse:3812.94910	validation_1-rmse:4136.04363
[22]	validation_0-rmse:3775.31811	validation_1-rmse:4097.5