# Loan Status Prediction

### Importing all dependencies

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score

### Data Collection and Processing

In [2]:
loan_dataset = pd.read_csv("data/loan.csv")

FileNotFoundError: ignored

In [None]:
loan_dataset.head()

In [None]:
# number of rows and columns
loan_dataset.shape

In [None]:
# statistical measures
loan_dataset.describe()

In [None]:
# missing values
loan_dataset.isnull().sum()

In [None]:
# dropping the missing values
loan_dataset = loan_dataset.dropna()

In [None]:
# check again
loan_dataset.isnull().sum()

In [None]:
# label encoding
loan_dataset.replace({"Loan_Status": {"N":0, "Y":1}}, inplace = True)

In [None]:
loan_dataset.head()

In [None]:
# dependent columns values
loan_dataset["Dependents"].value_counts()

In [None]:
# replacing the value of 3+ to 4
loan_dataset = loan_dataset.replace(to_replace='3+', value = 4)

In [None]:
loan_dataset["Dependents"].value_counts()

### Data Visualization

In [None]:
import matplotlib.pyplot as plt

In [None]:
sns.set_palette("tab10")

In [None]:
# education and load status
sns.countplot(x = 'Education', hue = "Loan_Status", data = loan_dataset)
plt.show()

In [None]:
# Marital status and loan status
sns.countplot(x = 'Married', hue = "Loan_Status", data = loan_dataset)
plt.show()

In [None]:
# marital Gender and loan status
sns.countplot(x = 'Gender', hue = "Loan_Status", data = loan_dataset)
plt.show()

In [None]:
# marital Self_Employed and loan status
sns.countplot(x = 'Self_Employed', hue = "Loan_Status", data = loan_dataset)
plt.show()

### Feature Engineering

In [None]:
# convert categorical columns to numerical values
loan_dataset.replace({"Married":{"No":0, "Yes":1},
                      "Gender":{"Male":1, "Female": 0},
                      "Self_Employed": {"No": 0, "Yes": 1},
                      "Property_Area": {"Rural": 0, "Semiurban": 1, "Urban": 2},
                      "Education": {"Graduate": 1, "Not Graduate": 0},
                      "Dependents": {"0": 0, "1": 1}
                     }, inplace = True)

In [None]:
loan_dataset.head()

In [None]:
# data and label separation
X = loan_dataset.drop(columns = ["Loan_ID", "Loan_Status"], axis = 1).values
Y = loan_dataset["Loan_Status"].values

In [None]:
print(X)
print(Y)

### Split Training and Test data

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.1, stratify = Y, random_state = 2)

In [None]:
print("Total data: ", X.shape)
print("Train data: ", X_train.shape)
print("Test data: ", X_test.shape)

In [None]:
print(X_train)
print(Y_train)

### Model Training

In [None]:
classifier = svm.SVC(kernel = "linear")

In [None]:
# training svc
classifier.fit(X_train, Y_train)

### Model Evaluation

In [None]:
# accuracy score on the training data
X_train_prediciton = classifier.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediciton, Y_train)
print("Accuracy on training data: ", training_data_accuracy)

In [None]:
# accuracy score on the test data
X_test_prediction = classifier.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
print("Accuracy on test data: ", test_data_accuracy)

# Predictive System

In [None]:
Y_test

array([1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1,
       0, 1, 1, 1], dtype=int64)

In [None]:
Y_test[19]

0

In [None]:
#Y[6]

In [None]:
X_new = X_test[19]
#X_new = X[6]
X_new

array([1, 0, 0, 0, 0, 2333, 1451.0, 102.0, 480.0, 0.0, 2], dtype=object)

In [None]:
X_new = X_new.reshape(1, -1)
X_new

array([[1, 0, 0, 0, 0, 2333, 1451.0, 102.0, 480.0, 0.0, 2]], dtype=object)

In [None]:
prediction = classifier.predict(X_new)
print(prediction)

[0]


In [None]:
if(prediction[0] == 0):
    print("Loan not approved.")
else:
    print("Loan approved")

Loan not approved.
