In [5]:
import os

In [6]:
%pwd

'c:\\Users\\anand\\Desktop\\reume_projet\\AutoPrice-AI\\research'

In [7]:
os.chdir("../")

In [8]:
%pwd

'c:\\Users\\anand\\Desktop\\reume_projet\\AutoPrice-AI'

In [None]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str
    target_column: str
    params: dict

In [10]:
from mlProject.constants import *
from mlProject.utils.common import read_yaml, create_directories

In [None]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath: Path = Path("config/config.yaml"),
        params_filepath: Path = Path("params.yaml"),
        schema_filepath: Path = Path("schema.yaml")
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)
        create_directories([self.config['artifacts_root']])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config['model_trainer']
        schema = self.schema['TARGET_COLUMN']

        create_directories([config['root_dir']])

        model_trainer_config = ModelTrainerConfig(
            root_dir=Path(config['root_dir']),
            train_data_path=Path(config['train_data_path']),
            test_data_path=Path(config['test_data_path']),
            model_name=config['model_name'],
            target_column=schema['name'],
            params=self.params
        )
        return model_trainer_config



In [None]:
from dataclasses import dataclass
from pathlib import Path
import os
import joblib
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from typing import Dict, Any, Tuple
import json
import yaml

In [None]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config
        self.model = self._initialize_model()
        self.feature_importances = None

    def _initialize_model(self) -> Pipeline:
        """Initialize XGBoost model with preprocessing pipeline"""
        return Pipeline([
            ('scaler', StandardScaler()),
            ('model', XGBRegressor(
                random_state=self.config.params['random_state'],
                n_jobs=-1,
                enable_categorical=False
            ))
        ])

    def _load_data(self) -> Tuple[pd.DataFrame, pd.DataFrame, pd.Series, pd.Series]:
        """Load and validate training data"""
        train_data = pd.read_csv(self.config.train_data_path)
        test_data = pd.read_csv(self.config.test_data_path)

        if self.config.target_column not in train_data.columns:
            raise ValueError(f"Target column '{self.config.target_column}' not found")

        X_train = train_data.drop([self.config.target_column], axis=1)
        X_test = test_data.drop([self.config.target_column], axis=1)
        y_train = train_data[self.config.target_column]
        y_test = test_data[self.config.target_column]

        print(f"Data shapes - Train: {X_train.shape}, Test: {X_test.shape}")
        return X_train, X_test, y_train, y_test

    def _evaluate_model(self, model, X_test: pd.DataFrame, y_test: pd.Series) -> Dict[str, float]:
        """Calculate evaluation metrics"""
        preds = model.predict(X_test)
        return {
            'rmse': np.sqrt(mean_squared_error(y_test, preds)),
            'mae': mean_absolute_error(y_test, preds),
            'r2': r2_score(y_test, preds)
        }

    def _save_artifacts(self, model, metrics: Dict[str, float]) -> str:
        """Save model and artifacts"""
        model_path = os.path.join(self.config.root_dir, self.config.model_name)
        joblib.dump(model, model_path)
        
        # Save metrics
        metrics_path = os.path.join(self.config.root_dir, "metrics.json")
        with open(metrics_path, 'w') as f:
            json.dump(metrics, f)
            
        # Save feature importances
        importances = pd.DataFrame({
            'feature': X_train.columns,
            'importance': model.named_steps['model'].feature_importances_
        }).sort_values('importance', ascending=False)
        
        importance_path = os.path.join(self.config.root_dir, "feature_importances.csv")
        importances.to_csv(importance_path, index=False)
        self.feature_importances = importances
        
        return model_path

    def train(self) -> Dict[str, Any]:
        """XGBoost training pipeline"""
        try:
            X_train, X_test, y_train, y_test = self._load_data()

            print("Starting XGBoost training")
            
            model = self.model
            model_params = self.config.params.get('xgboost', {})

            if model_params:
                print("Performing hyperparameter tuning with GridSearchCV")
                grid = GridSearchCV(
                    estimator=model,
                    param_grid=model_params,
                    cv=self.config.params['cv']['folds'],
                    scoring=self.config.params['scoring_metric'],
                    n_jobs=-1,
                    verbose=1
                )
                grid.fit(X_train, y_train)
                model = grid.best_estimator_
                print(f"Best parameters: {grid.best_params_}")

            metrics = self._evaluate_model(model, X_test, y_test)
            
            print("\nXGBoost performance:")
            for metric, value in metrics.items():
                print(f"{metric.upper()}: {value:.4f}")

            model_path = self._save_artifacts(model, metrics)
            print(f"\nModel saved to {model_path}")
            
            return {
                'metrics': metrics,
                'model_path': model_path,
                'feature_importances': self.feature_importances
            }

        except Exception as e:
            print(f"XGBoost training failed: {str(e)}")
            raise


In [22]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config)
    model_trainer_config.train()
except Exception as e:
    raise e

[2025-06-15 05:56:11,379: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-06-15 05:56:11,381: INFO: common: yaml file: params.yaml loaded successfully]
[2025-06-15 05:56:11,383: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-06-15 05:56:11,384: INFO: common: created directory at: artifacts]
[2025-06-15 05:56:11,386: INFO: common: created directory at: artifacts/model_trainer]


AttributeError: 'ModelTrainerConfig' object has no attribute 'params'