In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
import joblib

In [None]:
df = pd.read_csv('loan.csv')
df.head()

In [None]:
df.info()

In [None]:
df.drop('Loan_ID', axis=1, inplace=True)
df.head()

In [None]:
numerical_cols = ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Loan_Amount_Term']
for col in numerical_cols:
    df[col].fillna(df[col].median(), inplace=True)

In [None]:
categorical_cols = ['Gender', 'Married', 'Dependents', 'Self_Employed', 'Credit_History']
for col in categorical_cols:
    df[col].fillna('Unknown', inplace=True)

In [None]:
for col in categorical_cols + numerical_cols:
    df[col + '_Missing'] = df[col].isnull().astype(int)

In [None]:
print(df.columns)

In [None]:
label_encoder = LabelEncoder()

for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = df[col].astype(str)
    else:
        df[col] = df[col].astype(str)
    df[col] = label_encoder.fit_transform(df[col])

In [None]:
X = df.drop(["Loan_Status_Y"], axis=1)
y = df["Loan_Status_Y"]

In [None]:
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.078, random_state=0)

LogReg = LogisticRegression(solver='saga', max_iter=500, random_state=1)
History = LogReg.fit(X_train, y_train)

print(History)

y_pred = LogReg.predict(X_test)

LRAcc = accuracy_score(y_pred, y_test)
print('LR accuracy: {:.2f}%'.format(LRAcc * 100))

In [None]:
joblib.dump(LogReg, 'loanmodel.pkl')