In [1]:
import os

os.chdir("../../")
%pwd

'c:\\Users\\anfe1\\OneDrive\\Escritorio\\Instaleap\\Instamarket'

In [2]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    data_path: Path
    model: str
    hparams: dict

In [3]:
from instamarket.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
from instamarket.utils.common import read_yaml, create_directories

class ConfigurationManager:
    def __init__(self) -> None:
        config_file_path = CONFIG_FILE_PATH
        params_file_path = PARAMS_FILE_PATH

        self.config = read_yaml(config_file_path)
        self.params = read_yaml(params_file_path)

        create_directories([self.config.artifacts_root])
    
    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.training_arguments

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir= config.root_dir,
            data_path= config.data_path,
            model = params.model,
            hparams= params.hparams
        )

        return model_trainer_config

In [8]:
import os

from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
from sklearn.multioutput import MultiOutputRegressor

from instamarket.utils.common import load_object, save_object
from instamarket.logging import logger

MODELS = {
    "Linear Regression": LinearRegression(),
    "Lasso": Lasso(),
    "Ridge": Ridge(),
    "K-Neighbors Regressor": KNeighborsRegressor(),
    "Decision Tree": DecisionTreeRegressor(),
    "Random Forest Regressor": RandomForestRegressor(),
    "Gradient Boosting": GradientBoostingRegressor(),
    "XGBRegressor": XGBRegressor(), 
    "CatBoosting Regressor": CatBoostRegressor(verbose=False),
    "AdaBoost Regressor": AdaBoostRegressor()
    }

class ModelTrainer:
    def __init__(self, config:ModelTrainerConfig) -> None:
        self.config = config
    
    def train(self):
        logger.info("Loading training data")
        train_arr = load_object(os.path.join(self.config.data_path,"train.pkl"))

        logger.info("Split data - Independent & dependent variable")
        X_train,y_train = (train_arr.tocsc()[:,:-2], train_arr.tocsc()[:,-2:])
        
        logger.info(f"Model: {self.config.model}")
        model = MODELS[self.config.model]

        logger.info(f"Params: {self.config.hparams}")
        model.set_params(**self.config.hparams)
        multi_out_model = MultiOutputRegressor(model)

        logger.info("Start training model")
        multi_out_model.fit(X_train, y_train.toarray())

        logger.info("Saving model")
        save_object(os.path.join(self.config.root_dir,"model.pkl"), multi_out_model)


In [7]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer = ModelTrainer(config=model_trainer_config)
    model_trainer.train()
except Exception as e:
    raise e

[2024-04-20 15:35:22,311] 29 common - INFO - yaml file config\config.yml loaded successfully
[2024-04-20 15:35:22,315] 29 common - INFO - yaml file params.yml loaded successfully
[2024-04-20 15:35:22,317] 47 common - INFO - Created directory at: artifacts
[2024-04-20 15:35:22,317] 47 common - INFO - Created directory at: artifacts/model_trainer
[2024-04-20 15:35:22,317] 34 725514359 - INFO - Loading training data
[2024-04-20 15:35:22,350] 37 725514359 - INFO - Split data - Independent & dependent variable
[2024-04-20 15:35:22,584] 40 725514359 - INFO - Model: Gradient Boosting
[2024-04-20 15:35:22,585] 43 725514359 - INFO - Params: {}
[2024-04-20 15:37:51,250] 48 725514359 - INFO - Saving model
