In [2]:
import pandas as pd
import torch
import torch.nn as nn
import numpy as np
import tomllib
from pathlib import Path
from pandas import DataFrame
from common.utils import load_dataset, optimize_memory
import plotly.express as px
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import joblib
import os

### Exploratory Data Analysis

In [None]:
color = sns.color_palette()

In [None]:
train_df: DataFrame = load_dataset("loan-approval-prediction-clone", index=False)
train_df.head()

In [None]:
train_df.info()

In [None]:
train_df.describe()

In [None]:
train_df.dtypes

In [None]:
optimize_memory(train_df, deep=True)

In [None]:
train_df.dtypes

In [None]:
sns.countplot(train_df['person_home_ownership'])

In [None]:
sns.countplot(train_df['loan_intent'])

In [None]:
sns.countplot(train_df['loan_grade'])

In [None]:
sns.countplot(train_df['cb_person_default_on_file'])

In [None]:
categorical_cols = ['person_home_ownership', 'loan_intent', 'loan_grade',
                    'cb_person_default_on_file']

train_df = pd.get_dummies(train_df,
                          columns=categorical_cols,
                          drop_first=True)

train_df.columns = [col.lower() for col in train_df.columns]

train_df.head()

In [None]:
sns.histplot(train_df['person_age'], kde=True)
plt.xticks(rotation='vertical')
plt.xlabel('Person Age', fontsize=12)
plt.ylabel('Number of Occurrences', fontsize=12)
plt.show()

In [None]:
sns.histplot(train_df['person_income'], kde=True)
plt.xticks(rotation='vertical')
plt.xlabel('Person Income', fontsize=12)
plt.ylabel('Number of Occurrences', fontsize=12)
plt.show()

In [None]:
sns.histplot(train_df['person_emp_length'], kde=True)
plt.xticks(rotation='vertical')
plt.xlabel('Person Employment length', fontsize=12)
plt.ylabel('Number of Occurrences', fontsize=12)
plt.show()

In [None]:
sns.histplot(train_df['loan_amnt'], kde=True)
plt.xticks(rotation='vertical')
plt.xlabel('Person Employment length', fontsize=12)
plt.ylabel('Number of Occurrences', fontsize=12)
plt.show()

In [None]:
sns.histplot(train_df['loan_int_rate'], kde=True)
plt.xticks(rotation='vertical')
plt.xlabel('Person Income', fontsize=12)
plt.ylabel('Number of Occurrences', fontsize=12)
plt.show()

In [None]:
sns.histplot(train_df['loan_percent_income'], kde=True)
plt.xticks(rotation='vertical')
plt.xlabel('Person Income', fontsize=12)
plt.ylabel('Number of Occurrences', fontsize=12)
plt.show()

In [None]:
sns.histplot(train_df['cb_person_cred_hist_length'], kde=True)
plt.xticks(rotation='vertical')
plt.xlabel('Person Income', fontsize=12)
plt.ylabel('Number of Occurrences', fontsize=12)
plt.show()

In [None]:
sns.histplot(train_df['loan_status'], kde=True)
plt.xticks(rotation='vertical')
plt.xlabel('Person Income', fontsize=12)
plt.ylabel('Number of Occurrences', fontsize=12)
plt.show()

In [None]:
X = train_df.drop('loan_status', axis=1)
y = train_df['loan_status']

X.head()

In [None]:
y.head()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train.head()

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = LogisticRegression()
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

# 6. Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))
print('\nClassification Report:')
print(classification_report(y_test, y_pred))

In [None]:
model_dir = "models/"
os.makedirs(model_dir, exist_ok=True)  
model_path = os.path.join(model_dir, "logistic_regression_model.pkl")
joblib.dump(model, model_path)

print(f"Model saved to: {model_path}")

In [None]:
loaded_model = joblib.load(model_path)
print("Model loaded successfully!")

In [None]:
y_pred = loaded_model.predict(X_test)

# 6. Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))
print('\nClassification Report:')
print(classification_report(y_test, y_pred))