In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Explore Data

In [None]:

org_df = pd.read_csv("Data/loan_data.csv")
org_df = org_df.sample(frac=1)
org_df.head(3)

In [None]:
loan_df = org_df[['previous_loan_defaults_on_file', 'person_home_ownership', 'loan_status']]
loan_df.info()

In [None]:
loan_df[loan_df['loan_status'] == 1].shape[0]
loan_df[loan_df['loan_status'] == 0].shape[0]

In [None]:
loan_df.describe().T

In [None]:
cat_cols_names = loan_df.columns[loan_df.dtypes == 'object'].to_list()
num_cols_names = loan_df.columns[loan_df.dtypes != 'object'].to_list()
print('cat_cols_names= ', cat_cols_names, 'num_cols_names= ', num_cols_names)

In [None]:
cat_cols = [loan_df.columns.get_loc(c) for c in loan_df.columns[loan_df.dtypes == 'object']]
num_cols = [loan_df.columns.get_loc(c) for c in loan_df.columns[loan_df.dtypes != 'object']]
print('cat_cols_ids= ', cat_cols, 'num_cols_ids= ', num_cols)

# Split Data

In [None]:
X = loan_df.drop('loan_status', axis=1).values
y = loan_df[['loan_status']].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.3, random_state=45)

# Preprocess Data

In [None]:
from sklearn.compose import  ColumnTransformer 
from sklearn.pipeline import  Pipeline
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn.naive_bayes import GaussianNB


In [None]:
transform = ColumnTransformer(
 [
  ('loan_fault_encoder', OneHotEncoder(drop='first'), [0]),
  ('home_ownership_encoder', OrdinalEncoder(), [1])
 ]
)

In [None]:
print(X_train[:20])
print(transform.fit_transform(X_train)[:20])

# Tranasform and Process Data

In [None]:
pipe = Pipeline(
 steps= [
  ('preprocessing', transform),
  ('model', GaussianNB())
 ]
)

In [None]:
pipe.fit(X_train, y_train.ravel())

# Model Evaluation

In [None]:
from sklearn.metrics import accuracy_score, f1_score

In [None]:
y_pred = pipe.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='micro')
print(accuracy, f1)

In [None]:
with open('Results/metrics.txt', 'w') as model_eval:
    model_eval.write(f'Accuracy = {accuracy}\nF1-score= {f1}')

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix

matrix_graph = ConfusionMatrixDisplay.from_predictions(y_test, y_pred)
plt.savefig("Results/model_conf_matrix.png", dpi=120)

# Save Model

In [None]:
import skops.io as skio

In [None]:
skio.dump(pipe, 'Models/loan_approval_pipeline.skops')