In [1]:
import os

In [2]:
%pwd

'c:\\Users\\Yashar\\End-to-End-Employee-Classification-with-MLOPs\\research'

In [3]:
os.chdir('../')

In [4]:
%pwd

'c:\\Users\\Yashar\\End-to-End-Employee-Classification-with-MLOPs'

## config_entity.py

In [13]:
# Prepare the Entity

from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    data_path: Path
    preprocessor_path: Path
    model_path: Path
    n_estimators: float
    learning_rate: int
    max_depth: int   # will define parameters in params.yaml
    target_column: str  # will get target column from schema.yaml


## configuration.py

In [14]:
from mlProject.constants import *
from mlProject.utils.common import read_yaml, create_directories

class ConfigurationManager:
    def __init__(self,
                 config_filepath = CONFIG_FILE_PATH,
                 params_filepath = PARAMS_FILE_PATH,
                 schema_filepath = SCHEMA_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        
        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.GradientBoostingClassifier
        schema = self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            data_path= config.data_path,
            preprocessor_path= config.preprocessor_path,
            model_path= config.model_path,
            n_estimators= params.n_estimators,
            learning_rate= params.learning_rate,
            max_depth= params.max_depth,
            target_column= schema.name
        )

        return model_trainer_config

## components/model_trainer.py

In [15]:
# Defining the components

from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import GradientBoostingClassifier
from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.over_sampling import SMOTE
from sklearn.metrics import f1_score
import pandas as pd
import joblib
from mlProject import logger

class ModelTrainer:
    def __init__(self,config: ModelTrainerConfig):
        self.config = config

    def train(self):
        data = pd.read_csv('artifacts/data_ingestion/Employee.csv')
        data["Tenure"] = 2025 - data["JoiningYear"]  # Add derived feature
        X = data.drop("LeaveOrNot", axis=1)
        y = data["LeaveOrNot"]
        preprocessor = joblib.load('artifacts/data_transformation/preprocessor.pkl')

        # Define stratified k-fold
        skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

        # Define GBC with best parameters
        gbc = GradientBoostingClassifier(
            n_estimators=self.config.n_estimators,
            learning_rate=self.config.learning_rate,
            max_depth=self.config.max_depth,
            random_state=42
        )
        # Define the model
        model = ImbPipeline([
            ('preprocessor',preprocessor),
            ('smote',SMOTE(random_state=42)),
            ('classifier',gbc)
        ])

        model.fit(X,y)

        # Save model
        model_path = "artifacts/model_trainer/model_gbc_tuned.joblib"
        joblib.dump(model, model_path)
        print(f"Model saved to {model_path}")



## pipeline/stage_04_model_trainer.py

In [16]:
# defining the Pipeline

try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config)
    model_trainer_config.train()
except Exception as e:
    raise e

[2025-07-15 11:59:55,518: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-07-15 11:59:55,541: INFO: common: yaml file: params.yaml loaded successfully]
[2025-07-15 11:59:55,546: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-07-15 11:59:55,549: INFO: common: created directory at: artifacts]
[2025-07-15 11:59:55,550: INFO: common: created directory at: artifacts/model_trainer]
Model saved to artifacts/model_trainer/model_gbc_tuned.joblib
