In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pickle

In [None]:
class DataSplitter:
    def __init__(self, data, target_column):
        self.data = data
        self.target_column = target_column
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.y_test = None

    def preprocess_data(self):
        """Data Cleaning and Encoding"""
        print("\nStarting Data Preprocessing...")

        # Remove unnecessary columns
        if 'Loan_ID' in self.data.columns:
            self.data.drop(columns=['Loan_ID'], inplace=True)

        # Fill missing numerical values with mean
        num_cols = self.data.select_dtypes(include=np.number).columns
        for col in num_cols:
            self.data[col].fillna(self.data[col].mean(), inplace=True)

        # Fill missing categorical values with mode
        cat_cols = self.data.select_dtypes(include='object').columns
        for col in cat_cols:
            self.data[col].fillna(self.data[col].mode()[0], inplace=True)

        # Encode categorical columns
        label_enc = LabelEncoder()
        for col in cat_cols:
            self.data[col] = label_enc.fit_transform(self.data[col])

        print("Data Preprocessing Completed!")
        return self.data

    def split_data(self, test_size=0.2, random_state=42):
        print("\nSplitting Data into Train and Test Sets...")
        X = self.data.drop(columns=[self.target_column])
        y = self.data[self.target_column]

        # Standardize features
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)

        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            X_scaled, y, test_size=test_size, random_state=random_state
        )
        print("Data Split Completed!")
        print(f"Training Samples: {self.X_train.shape[0]}, Testing Samples: {self.X_test.shape[0]}")
        return self.X_train, self.X_test, self.y_train, self.y_test
data = pd.read_csv("Loan.csv")

    
splitter = DataSplitter(data, target_column="Loan_Status")
data = splitter.preprocess_data()
X_train, X_test, y_train, y_test = splitter.split_data()


Starting Data Preprocessing...
Data Preprocessing Completed!

Splitting Data into Train and Test Sets...
Data Split Completed!
Training Samples: 491, Testing Samples: 123


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  self.data[col].fillna(self.data[col].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  self.data[col].fillna(self.data[col].mode()[0], inplace=True)


In [10]:
class ModelTrainer:
    def __init__(self, model_name="Logistic Regression"):
        self.model_name = model_name
        self.model = None

    def train_model(self, X_train, y_train):
        print(f"\nTraining Model: {self.model_name} ...")
        self.model = LogisticRegression(max_iter=500)
        self.model.fit(X_train, y_train)
        print("Model Training Completed Successfully!")
        return self.model
    
trainer = ModelTrainer()
model = trainer.train_model(X_train, y_train)



Training Model: Logistic Regression ...
Model Training Completed Successfully!


In [15]:
class ModelEvaluator:
    def __init__(self, model, X_test, y_test):
        self.model = model
        self.X_test = X_test
        self.y_test = y_test

    def evaluate(self):
        print("\nEvaluating Model Performance...")
        y_pred = self.model.predict(self.X_test)

        acc = accuracy_score(self.y_test, y_pred)
        cm = confusion_matrix(self.y_test, y_pred)
        cr = classification_report(self.y_test, y_pred)

        print(f"Model Accuracy: {acc:.4f}")
        print("\nConfusion Matrix:\n", cm)
        print("\nClassification Report:\n", cr)
        return acc, cm, cr

evaluator = ModelEvaluator(model, X_test, y_test)
evaluator.evaluate()


Evaluating Model Performance...
Model Accuracy: 0.7886

Confusion Matrix:
 [[18 25]
 [ 1 79]]

Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.42      0.58        43
           1       0.76      0.99      0.86        80

    accuracy                           0.79       123
   macro avg       0.85      0.70      0.72       123
weighted avg       0.83      0.79      0.76       123



(0.7886178861788617,
 array([[18, 25],
        [ 1, 79]]),
 '              precision    recall  f1-score   support\n\n           0       0.95      0.42      0.58        43\n           1       0.76      0.99      0.86        80\n\n    accuracy                           0.79       123\n   macro avg       0.85      0.70      0.72       123\nweighted avg       0.83      0.79      0.76       123\n')

In [13]:
class ModelSaver:
    def __init__(self, model, filename="loan_model.pkl"):
        self.model = model
        self.filename = filename

    def save_model(self):
        with open(self.filename, 'wb') as file:
            pickle.dump(self.model, file)
        print(f"Model saved successfully as '{self.filename}'")

    def load_model(self):
        with open(self.filename, 'rb') as file:
            loaded_model = pickle.load(file)
        print(f"Model loaded successfully from '{self.filename}'")
        return loaded_model
    
saver = ModelSaver(model)
saver.save_model()
loaded_model = saver.load_model()

Model saved successfully as 'loan_model.pkl'
Model loaded successfully from 'loan_model.pkl'
