In [1]:
import os
os.chdir("../")
os.getcwd()

'/home/izam/coding/Loan-Prediction-System'

In [2]:
from dataclasses import dataclass
from pathlib import Path

# entity
@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str
    params: dict

In [3]:
from sentimentAnalysis.constants import *
from sentimentAnalysis.utils.common import read_yaml, create_directories

In [4]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path = config.train_data_path,
            test_data_path = config.test_data_path,
            model_name = config.model_name,
            params = self.params,
        )

        return model_trainer_config

In [5]:
from sklearn.model_selection import RandomizedSearchCV
import pandas as pd
from loanPrediction import logger
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from box import ConfigBox
from sklearn.svm import SVC
from loanPrediction.utils.common import save_bin

In [6]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config
        train = pd.read_csv(self.config.train_data_path)
        test = pd.read_csv(self.config.test_data_path)

        self.X_train= train.drop(["Loan_Status"], axis=1)
        self.y_train = train["Loan_Status"]
        self.X_test = test.drop(["Loan_Status"], axis=1)
        self.y_test = test["Loan_Status"]   


    def _randomized_search(self, name,clf,params, runs=20): 
        
        rand_clf = RandomizedSearchCV(clf, params, n_iter=runs, cv=5, n_jobs=-1, random_state=2)     

        rand_clf.fit(self.X_train, self.y_train) 
        best_model = rand_clf.best_estimator_
        
        # Extract best score
        best_score = rand_clf.best_score_

        # Print best score
        logger.info("Trained with {} with score: {:.3f}".format(name, best_score))

        # Predict test set labels
        y_pred = best_model.predict(self.X_test)

        # Compute accuracy
        accuracy = accuracy_score(self.y_test, y_pred)

        # Print accuracy
        logger.info('Predicted with {} ; Test score : {:.3f}'.format(name, accuracy))
        
        return best_model, accuracy
        

    def train(self):
        model_params = self.config.params

        models = ConfigBox({
            "Decision_Tree": {
                "model" : DecisionTreeClassifier(),
                "params" : model_params.Decision_Tree
            },
            "Random_Forest": {
                "model" : RandomForestClassifier(),
                "params" : model_params.Random_Forest
            },
            "SVC": {
                "model" : SVC(),
                "params" : model_params.SVC
            }
        })

        create_directories([os.path.join(self.config.root_dir, "models")])
        trained_models = []
        for model in models:
            clf = models[model].model
            params = models[model].params

            clf_model, score = self._randomized_search(name=str(model) ,clf=clf, params=params)
            trained_models.append((clf_model, score))

            save_bin(data=clf_model, path=Path(os.path.join(self.config.root_dir, f"models/{str(model)}.joblib")))
        
        trained_models = sorted(trained_models, key=lambda x:x[1], reverse=True)  # [(model, score), (model, score), ..]
        best_model = trained_models[0][0]  # taking the model

        save_bin(data=best_model, path=Path(os.path.join(self.config.root_dir, self.config.model_name)))

        best_model_name = str(best_model)[:str(best_model).find("(")]
        best_model_score = round(trained_models[0][1], 3)
        logger.info(f"Saved main model as {best_model_name}, with score - {best_model_score}")



In [7]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer_config = ModelTrainer(config=model_trainer_config)
    model_trainer_config.train()
except Exception as e:
    raise e

[2023-12-11 16:59:14,652: INFO: common: yaml file: config/config.yaml loaded successfully]
[2023-12-11 16:59:14,657: INFO: common: yaml file: params.yaml loaded successfully]
[2023-12-11 16:59:14,660: INFO: common: yaml file: schema.yaml loaded successfully]
[2023-12-11 16:59:14,660: INFO: common: created directory at: artifacts]
[2023-12-11 16:59:14,661: INFO: common: created directory at: artifacts/model_trainer]
[2023-12-11 16:59:14,666: INFO: common: created directory at: artifacts/model_trainer/models]
[2023-12-11 16:59:16,574: INFO: 4220509917: Trained with Decision_Tree with score: 0.819]
[2023-12-11 16:59:16,577: INFO: 4220509917: Predicted with Decision_Tree ; Test score : 0.772]
[2023-12-11 16:59:16,579: INFO: common: binary file saved at: artifacts/model_trainer/models/Decision_Tree.joblib]
[2023-12-11 16:59:18,025: INFO: 4220509917: Trained with Random_Forest with score: 0.819]
[2023-12-11 16:59:18,032: INFO: 4220509917: Predicted with Random_Forest ; Test score : 0.756]
[2



[2023-12-11 16:59:18,453: INFO: 4220509917: Trained with SVC with score: 0.688]
[2023-12-11 16:59:18,456: INFO: 4220509917: Predicted with SVC ; Test score : 0.683]
[2023-12-11 16:59:18,457: INFO: common: binary file saved at: artifacts/model_trainer/models/SVC.joblib]
[2023-12-11 16:59:18,458: INFO: common: binary file saved at: artifacts/model_trainer/model.joblib]
[2023-12-11 16:59:18,459: INFO: 4220509917: Saved main model as DecisionTreeClassifier, with score - 0.772]
