In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [2]:
df = pd.read_csv('train.csv')
df.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [3]:
df.shape

(614, 13)

In [4]:
df.describe()

Unnamed: 0,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History
count,614.0,614.0,592.0,600.0,564.0
mean,5403.459283,1621.245798,146.412162,342.0,0.842199
std,6109.041673,2926.248369,85.587325,65.12041,0.364878
min,150.0,0.0,9.0,12.0,0.0
25%,2877.5,0.0,100.0,360.0,1.0
50%,3812.5,1188.5,128.0,360.0,1.0
75%,5795.0,2297.25,168.0,360.0,1.0
max,81000.0,41667.0,700.0,480.0,1.0


In [5]:
df.isnull().sum()

Loan_ID               0
Gender               13
Married               3
Dependents           15
Education             0
Self_Employed        32
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64

In [6]:
column = ['Gender', 'Married', 'Dependents', 'Education',
       'Self_Employed', 'Property_Area', 'Loan_Status']
values = {}
for col in column:
    values[col] = df[col].unique()
values

{'Gender': array(['Male', 'Female', nan], dtype=object),
 'Married': array(['No', 'Yes', nan], dtype=object),
 'Dependents': array(['0', '1', '2', '3+', nan], dtype=object),
 'Education': array(['Graduate', 'Not Graduate'], dtype=object),
 'Self_Employed': array(['No', 'Yes', nan], dtype=object),
 'Property_Area': array(['Urban', 'Rural', 'Semiurban'], dtype=object),
 'Loan_Status': array(['Y', 'N'], dtype=object)}

In [7]:
for key, value in values.items():
    m=0
    df[key].fillna(df[key].mode()[0], inplace=True)
    value = list(value)
    for i in value:
        df.replace({key:{i: m}}, inplace = True)
        m += 1

In [8]:
column = ['Gender', 'Married', 'Dependents', 'Education',
       'Self_Employed', 'Property_Area', 'Loan_Status']
values = {}
for col in column:
    values[col] = df[col].unique()
values

{'Gender': array([0, 1], dtype=int64),
 'Married': array([0, 1], dtype=int64),
 'Dependents': array([0, 1, 2, 3], dtype=int64),
 'Education': array([0, 1], dtype=int64),
 'Self_Employed': array([0, 1], dtype=int64),
 'Property_Area': array([0, 1, 2], dtype=int64),
 'Loan_Status': array([0, 1], dtype=int64)}

In [9]:
df.isnull().sum()

Loan_ID               0
Gender                0
Married               0
Dependents            0
Education             0
Self_Employed         0
ApplicantIncome       0
CoapplicantIncome     0
LoanAmount           22
Loan_Amount_Term     14
Credit_History       50
Property_Area         0
Loan_Status           0
dtype: int64

In [10]:
df["Credit_History"].fillna(df["Credit_History"].mode(), inplace = True)

In [11]:
df.shape

(614, 13)

In [12]:
df.dropna(inplace = True)

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 529 entries, 1 to 613
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Loan_ID            529 non-null    object 
 1   Gender             529 non-null    int64  
 2   Married            529 non-null    int64  
 3   Dependents         529 non-null    int64  
 4   Education          529 non-null    int64  
 5   Self_Employed      529 non-null    int64  
 6   ApplicantIncome    529 non-null    int64  
 7   CoapplicantIncome  529 non-null    float64
 8   LoanAmount         529 non-null    float64
 9   Loan_Amount_Term   529 non-null    float64
 10  Credit_History     529 non-null    float64
 11  Property_Area      529 non-null    int64  
 12  Loan_Status        529 non-null    int64  
dtypes: float64(4), int64(8), object(1)
memory usage: 57.9+ KB


In [14]:
df.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
1,LP001003,0,1,1,0,0,4583,1508.0,128.0,360.0,1.0,1,1
2,LP001005,0,1,0,0,1,3000,0.0,66.0,360.0,1.0,0,0
3,LP001006,0,1,0,1,0,2583,2358.0,120.0,360.0,1.0,0,0
4,LP001008,0,0,0,0,0,6000,0.0,141.0,360.0,1.0,0,0
5,LP001011,0,1,2,0,1,5417,4196.0,267.0,360.0,1.0,0,0


In [15]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns = ['Loan_ID','Loan_Status'], axis = 1), df.Loan_Status,test_size = 0.1,stratify = df.Loan_Status, random_state = 2023)

## Using Sklearn

In [16]:
classifier = SVC(kernel = 'linear').fit(X_train, y_train)

In [17]:
print('Accuracy score on train data:', accuracy_score(classifier.predict(X_train), y_train))

Accuracy score on train data: 0.7857142857142857


In [18]:
print('Accuracy score on test data:', accuracy_score(classifier.predict(X_test), y_test))

Accuracy score on test data: 0.8113207547169812


## Using tensorflow

In [19]:
import tensorflow as tf
import keras
from keras import layers

In [20]:
normalizer = layers.Normalization()
normalizer.adapt(X_train)

# Create a model that include the normalization layer
inputs = keras.Input(shape=11)
x = normalizer(inputs)
x = layers.Dense(100, activation="relu")(x)
outputs = layers.Dense(1, activation = "sigmoid")(x)
model = keras.Model(inputs, outputs)


model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x293a16ebfa0>

In [21]:
loss, accuracy = model.evaluate(X_test, y_test)
print('Accuracy: %.2f' % (accuracy*100))

Accuracy: 83.02
