In [1]:
import os

In [2]:
os.getcwd()

'c:\\Users\\Archana\\Desktop\\test\\assignmen_solution\\research'

In [3]:
os.chdir('../')

In [4]:
os.getcwd()

'c:\\Users\\Archana\\Desktop\\test\\assignmen_solution'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen = True)
class ModelTrainerConfig:
    root_dir:Path
    train_data_path:Path
    test_data_path:Path
    model_name:str
    alpha:float
    l1_ratio:float
    target_column:str

In [6]:
import sys
sys.path.append('C:/Users/Archana/Desktop/test/assignmen_solution/src')

In [7]:
from ml_proj.constants import *
from  ml_proj.utils.common import read_yaml, create_directories

In [8]:
class LogisticRegressionConfig:
    def __init__(self, root_dir, train_data_path, test_data_path, model_name, param_grid, cv_folds, scoring_metric, target_column):
        self.root_dir = root_dir
        self.train_data_path = train_data_path
        self.test_data_path = test_data_path
        self.model_name = model_name
        self.param_grid = param_grid
        self.cv_folds = cv_folds
        self.scoring_metric = scoring_metric
        self.target_column = target_column


In [9]:
import os
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import joblib
from ml_proj import logger

# Configuration Manager to manage config files and provide necessary configs
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath  = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_logistic_regression_config(self):
        config = self.config.model_trainer
        params = self.params.LogisticRegression
        schema = self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        logistic_regression_config = LogisticRegressionConfig(
            root_dir=config.root_dir,
            train_data_path=config.train_data_path,
            test_data_path=config.test_data_path,
            model_name=config.model_name,
            param_grid=params.param_grid,
            cv_folds=config.cv_folds,
            scoring_metric=config.scoring_metric,
            target_column=schema.name,
        )

        return logistic_regression_config


In [10]:
import os
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import joblib
import pickle
from ml_proj import logger

In [11]:
class ModelTrainer:
    def __init__(self, config: LogisticRegressionConfig):
        self.config = config

    def train(self):
        # Read training and testing data
        train_df = pd.read_csv(self.config.train_data_path)
        test_df = pd.read_csv(self.config.test_data_path)

        # Separate features (X) and target (y)
        train_x = train_df.drop([self.config.target_column], axis=1)
        test_x = test_df.drop([self.config.target_column], axis=1)
        train_y = train_df[[self.config.target_column]]
        test_y = test_df[[self.config.target_column]]

        # Create Logistic Regression model
        model = LogisticRegression()

        # Set up GridSearchCV for hyperparameter tuning
        grid_search = GridSearchCV(
            model, 
            self.config.param_grid, 
            cv=self.config.cv_folds, 
            scoring=self.config.scoring_metric
        )

        # Fit the model to the training data
        grid_search.fit(train_x, train_y)

        # Get the best model from GridSearchCV
        best_model = grid_search.best_estimator_

        # # Make predictions on the test data
        # y_pred = best_model.predict(test_x)

        # # Calculate accuracy on the test set
        # accuracy = accuracy_score(test_y, y_pred)
        # logger.info(f"Best Parameters: {grid_search.best_params_}")
        # logger.info(f"Accuracy: {accuracy:.4f}")

        # Save the trained model
        model_path = os.path.join(self.config.root_dir, self.config.model_name)
        # joblib.dump(best_model, model_path)
        with open('artifacts/model_trainer/logistic_regression_model.pkl','wb') as f:
            pickle.dump(best_model, f)
        logger.info(f"Model saved at: {model_path}")

In [12]:
try:
    # Load the Configuration Manager
    config = ConfigurationManager()
    
    # Get the logistic regression configuration
    logistic_regression_config = config.get_logistic_regression_config()

    # Create the ModelTrainer instance
    model_trainer = ModelTrainer(config=logistic_regression_config)
    
    # Train the Logistic Regression model
    model_trainer.train()

except Exception as e:
    raise e


[2025-03-27 18:28:05,863: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-03-27 18:28:05,879: INFO: common: yaml file: params.yaml loaded successfully]
[2025-03-27 18:28:05,886: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-03-27 18:28:05,889: INFO: common: created directory at: artifacts]
[2025-03-27 18:28:05,893: INFO: common: created directory at: artifacts/model_trainer]


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[2025-03-27 18:28:06,527: INFO: 4219896723: Model saved at: artifacts/model_trainer\logistic_regression_model.pkl]


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
