In [76]:
import os

In [77]:
%pwd

'd:\\Python_DataScience\\Wine_Quality_Prediction\\Wine_Quality_Prediction_End_To_End_ML'

In [78]:
os.listdir()

['.git',
 '.gitignore',
 'app.py',
 'artifacts',
 'config',
 'LICENSE',
 'logs',
 'main.py',
 'params.yaml',
 'README.md',
 'requirements.txt',
 'research',
 'schema.yaml',
 'setup.py',
 'src',
 'template.py',
 'templates']

In [96]:
from dataclasses import dataclass
from pathlib import Path
from typing import Optional

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str
    TARGET_COLUMN: str
    max_depth: Optional[int]
    max_features: str
    min_samples_split: int
    n_estimators: 50
    

In [97]:
from wine_pred.constants import *

from wine_pred.utils.common import read_yaml, create_directories

In [92]:
class ConfigurationManager:
    def __init__ (
            self,
            config_filepath = CONFIG_FILE_PATH,
            params_filepath = PARAMS_FILE_PATH,
            schema_filepath = SCHEMA_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        print("Config:", self.config)  # Debugging print
        print("Params:", self.params)  # Debugging print
        print("Schema:", self.schema)  # Debugging print

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:

        config = self.config.model_trainer
        params = self.params.Random_Forest_Regressor
        schema =  self.schema.TARGET_COLUMN

        print("Model Trainer Config:", config)  # Debugging print
        print("Params:", params)  # Debugging print
        print("Schema:", schema)  # Debugging print

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path=config.train_data_path,
            test_data_path=config.test_data_path,
            model_name=config.model_name,
            TARGET_COLUMN=schema.name,
            max_depth=params.max_depth,
            max_features=params.max_features,
            min_samples_split=params.min_samples_split,
            n_estimators=params.n_estimators)
        
        return model_trainer_config
    


In [98]:
from wine_pred.logging.logger import logger
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import QuantileTransformer
from sklearn.ensemble import RandomForestRegressor
import joblib

In [101]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config
        self.log_columns = ['fixed acidity', 'volatile acidity','residual sugar', 'citric acid', 'chlorides', 'sulphates', 'free sulfur dioxide', 'total sulfur dioxide', 'alcohol']
        logger.info('Constructor Initialized')

    def Train(self):
        train_data = pd.read_csv(self.config.train_data_path)
        logger.info('Train Dataset Read Done')
        preprocessor = ColumnTransformer(
            transformers=[
                ('quantile', QuantileTransformer(output_distribution='normal'), self.log_columns),
            ],
            remainder='passthrough' )
        
        
        X_train = train_data.drop([self.config.TARGET_COLUMN], axis=1)
        y_train = train_data[[self.config.TARGET_COLUMN]]

        pipeline = Pipeline(
        steps=[
            ('Preprocessor', preprocessor),
            ('Random Forest Reg', RandomForestRegressor(random_state=42,
                                                        max_depth=self.config.max_depth,
                                                        max_features=self.config.max_features,
                                                        min_samples_split=self.config.min_samples_split,
                                                        n_estimators=self.config.n_estimators))
        ] )

        pipeline.fit(X_train,y_train)
        logger.info('Object Training Done')

        model_path = os.path.join(self.config.root_dir,self.config.model_name)
        joblib.dump(pipeline, model_path)

        logger.info(f'Model Object saved at {model_path}')
        

In [102]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer = ModelTrainer(model_trainer_config)
    model_trainer.Train()
except Exception as e:
    logger.info('Problem During Model Training')
    raise e

[ 2024-07-07 22:39:59,025 ] 29 root - INFO - yaml file: config\config.yaml loaded succesfully
[ 2024-07-07 22:39:59,031 ] 29 root - INFO - yaml file: params.yaml loaded succesfully
[ 2024-07-07 22:39:59,033 ] 29 root - INFO - yaml file: schema.yaml loaded succesfully
Config: {'artifacts_root': 'artifacts', 'data_ingestion': {'root_dir': 'artifacts/data_ingestion', 'source_URL': 'https://github.com/fiftybucks101/Datasets/raw/main/wine_quality_dataset.zip', 'local_data_file': 'artifacts/data_ingestion/wine_quality.zip', 'unzip_dir': 'artifacts/data_ingestion'}, 'data_validation': {'root_dir': 'artifacts/data_validation', 'status_file': 'artifacts/data_validation/status.txt', 'unziped_data_dir': 'artifacts/data_ingestion/wine_quality_dataset/WineQT.csv'}, 'data_transformation': {'root_dir': 'artifacts/data_transformation', 'data_path': 'artifacts/data_ingestion/wine_quality_dataset/WineQT.csv'}, 'model_trainer': {'root_dir': 'artifacts/model_training', 'train_data_path': 'artifacts/data_t

  return fit_method(estimator, *args, **kwargs)
