In [55]:
import os

In [56]:
# os.chdir('../')
%pwd

'/workspaces/mlproject_wine_quality'

In [57]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_path: Path

In [58]:
from wine_quality_predictor.constants import *
from wine_quality_predictor.utils.common import read_yaml, make_directory

class ConfigurationManager:
    def __init__(
        self,
        config_filepath: Path = CONFIG_FILE_PATH,
        params_filepath: Path = PARAMS_FILE_PATH,
        schema_filepath: Path = SCHEMA_FILE_PATH
    ):
        self.config_filepath = config_filepath
        self.params_filepath = params_filepath
        self.schema_filepath = schema_filepath

        self.config = read_yaml(Path(self.config_filepath))
        self.params = read_yaml(Path(self.params_filepath))
        self.schema = read_yaml(Path(self.schema_filepath))

        make_directory(Path(self.config.artifacts_root))
    
    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer

        return ModelTrainerConfig(
            root_dir=Path(config.root_dir),
            train_data_path=Path(config.train_data_path),
            test_data_path=Path(config.test_data_path),
            model_path=Path(config.model_path)
        )


In [59]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import accuracy_score
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error, r2_score
from wine_quality_predictor.utils.common import make_directory, save_bin
# from wine_quality_predictor.entity.config_entity import ModelTrainerConfig
from wine_quality_predictor import logger
from sklearn.metrics import accuracy_score

class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config

    def train(self):
        logger.info("Reading training and testing data...")
        train_df = pd.read_csv(self.config.train_data_path)
        test_df = pd.read_csv(self.config.test_data_path)

        X_train = train_df.drop("quality", axis=1)
        y_train = train_df["quality"]
        X_test = test_df.drop("quality", axis=1)
        y_test = test_df["quality"]
        # print(y_train.isna().sum())
        #############################################################
        # logger.info("Training ElasticNet model...")
        # model = ElasticNet(alpha=0.2, l1_ratio=0.1, random_state=42)
        #############################################################
        # logger.info("Training Random Forest Regressor model...")
        # model = RandomForestRegressor(n_estimators=100, random_state=42)
        #############################################################
        logger.info("Training XGBoost Regressor model...")
        model = XGBRegressor(objective='reg:squarederror', n_estimators=100, learning_rate=0.1, max_depth=6, random_state=42)
        #############################################################
        model.fit(X_train, y_train)

        logger.info("Evaluating model...")
        predictions = model.predict(X_test)
        mse = mean_squared_error(y_test, predictions)
        r2 = r2_score(y_test, predictions)

        logger.info(f"Model MSE: {mse:.4f}")
        logger.info(f"Model R^2 Score: {r2:.4f}")

        logger.info("Saving model...")
        make_directory(self.config.root_dir)
        save_bin(self.config.model_path, model)
        logger.info(f"Model saved to: {self.config.model_path}")
        ##lets see accuracy
        print(f"Accuracy: {r2*100:.2f}%")


In [60]:
# from src.<your_project>.config.configuration import ConfigurationManager
# from src.<your_project>.components.model_trainer import ModelTrainer
from wine_quality_predictor import logger

STAGE_NAME = "Model Training"

def main():
    try:
        logger.info(f">>>>>> Stage {STAGE_NAME} started <<<<<<")
        config = ConfigurationManager().get_model_trainer_config()
        trainer = ModelTrainer(config)
        trainer.train()
        logger.info(f">>>>>> Stage {STAGE_NAME} completed <<<<<<\n")
    except Exception as e:
        logger.exception(f"Error in stage {STAGE_NAME}: {e}")
        raise e


In [61]:
main()

[2025-04-13 08:10:47,508] INFO - 1569636174 - >>>>>> Stage Model Training started <<<<<<
[2025-04-13 08:10:47,514] INFO - common - Loaded YAML file from: config/config.yaml
[2025-04-13 08:10:47,517] INFO - common - Loaded YAML file from: params.yaml
[2025-04-13 08:10:47,521] INFO - common - Loaded YAML file from: schema.yaml
[2025-04-13 08:10:47,522] INFO - common - Created directory: artifacts
[2025-04-13 08:10:47,523] INFO - 1357590210 - Reading training and testing data...
[2025-04-13 08:10:47,532] INFO - 1357590210 - Training XGBoost Regressor model...
[2025-04-13 08:10:47,676] INFO - 1357590210 - Evaluating model...
[2025-04-13 08:10:47,682] INFO - 1357590210 - Model MSE: 0.0318
[2025-04-13 08:10:47,683] INFO - 1357590210 - Model R^2 Score: 0.9510
[2025-04-13 08:10:47,684] INFO - 1357590210 - Saving model...
[2025-04-13 08:10:47,684] INFO - common - Created directory: artifacts/model_trainer
[2025-04-13 08:10:47,689] INFO - common - Saved binary file at: artifacts/model_trainer/mo

Accuracy: 95.10%


In [62]:

train_data_path= "artifacts/data_transformation/train.csv"
test_data_path= "artifacts/data_transformation/test.csv"
train_df = pd.read_csv(str(train_data_path))
test_df = pd.read_csv(str(test_data_path))
X_train = train_df.drop("quality", axis=1)
y_train = train_df["quality"]
X_test = test_df.drop("quality", axis=1)
y_test = test_df["quality"]
print(y_train.isna().sum())
y_train


0


0      5
1      6
2      5
3      6
4      6
      ..
912    5
913    6
914    6
915    6
916    5
Name: quality, Length: 917, dtype: int64