In [19]:
import os

In [20]:
%pwd

'c:\\Users\\SIR\\Credit-Card-Default-Prediction-Project-Pwskills'

In [12]:
os.chdir("../")

In [21]:
%pwd

'c:\\Users\\SIR\\Credit-Card-Default-Prediction-Project-Pwskills'

In [22]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainingConfig:
    model_dir: Path
    model_file: Path
    train_file: Path
    test_file: Path

In [23]:
from ccdp.constants import *
from ccdp.utils.common import read_yaml, create_directories

In [24]:
class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        
        create_directories([self.config.artifacts_root])

    #Model Training Configuration
    def get_model_training_config(self) -> ModelTrainingConfig:
        config = self.config.model_training

        create_directories([config.model_dir])

        model_training_config = ModelTrainingConfig(
            model_dir=Path(config.model_dir),
            model_file=Path(config.model_file),
            train_file=Path(config.train_file),
            test_file=Path(config.test_file)
        )

        return model_training_config


In [25]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from pathlib import Path
from ccdp.logging import logger
import pickle

class ModelTraining:
    def __init__(self, config: ModelTrainingConfig):
        self.config = config
        self.model = RandomForestClassifier()

    def load_data(self) -> tuple:
        """
        Load the transformed training and testing data.
        """
        train_df = pd.read_csv(self.config.train_file)
        test_df = pd.read_csv(self.config.test_file)

        X_train = train_df.drop(columns=["default"])
        y_train = train_df["default"]
        
        X_test = test_df.drop(columns=["default"])
        y_test = test_df["default"]

        return X_train, y_train, X_test, y_test

    def train_model(self, X_train, y_train):
        """
        Train the model on the training data.
        """
        logger.info("Training the model...")
        self.model.fit(X_train, y_train)
        logger.info("Model training completed.")

    def evaluate_model(self, X_test, y_test):
        """
        Evaluate the trained model on the test data.
        """
        logger.info("Evaluating the model...")
        y_pred = self.model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        report = classification_report(y_test, y_pred)
        
        logger.info(f"Model accuracy: {accuracy}")
        logger.info(f"Classification report:\n{report}")

    def save_model(self):
        """
        Save the trained model to disk.
        """
        os.makedirs(os.path.dirname(self.config.model_file), exist_ok=True)
        with open(self.config.model_file, "wb") as model_file:
            pickle.dump(self.model, model_file)
        logger.info(f"Trained model saved to {self.config.model_file}")

    def run_training(self):
        """
        Complete the training pipeline: load data, train model, evaluate, and save the model.
        """
        X_train, y_train, X_test, y_test = self.load_data()
        self.train_model(X_train, y_train)
        self.evaluate_model(X_test, y_test)
        self.save_model()

In [26]:
#from ccdp.config.configuration import ConfigurationManager
#from ccdp.components.model_training import ModelTraining
#from ccdp.logging import logger

try:
    #Load the configuration for model training
    config = ConfigurationManager()
    model_training_config = config.get_model_training_config()
    
    #Initialize the ModelTraining component
    model_training = ModelTraining(config=model_training_config)
    
    #Run the model training process
    model_training.run_training()
    
    logger.info("Model training pipeline completed successfully.")

except Exception as e:
    logger.error(f"Pipeline failed due to: {e}")


[2024-08-20 23:26:59,957: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-08-20 23:26:59,962: INFO: common: yaml file: params.yaml loaded successfully]
[2024-08-20 23:26:59,964: INFO: common: created directory at: artifacts]
[2024-08-20 23:26:59,967: INFO: common: created directory at: artifacts/model_training/models]
[2024-08-20 23:27:00,345: INFO: 3057392489: Training the model...]
[2024-08-20 23:27:11,744: INFO: 3057392489: Model training completed.]
[2024-08-20 23:27:11,745: INFO: 3057392489: Evaluating the model...]
[2024-08-20 23:27:11,915: INFO: 3057392489: Model accuracy: 0.818]
[2024-08-20 23:27:11,916: INFO: 3057392489: Classification report:
              precision    recall  f1-score   support

           0       0.84      0.95      0.89      3505
           1       0.66      0.36      0.47       995

    accuracy                           0.82      4500
   macro avg       0.75      0.65      0.68      4500
weighted avg       0.80      0.82      0.80 