In [10]:
# ------------------------------------------------------------------
# Build the Support Vector Classifier Model
# Predict the loan approval status based on 
# Gender, Marital Status, Credit History, Income and Loan Amount
# ------------------------------------------------------------------

# Import Libraries and read csv file
import statistics as st
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
from mpl_toolkits import mplot3d
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

"dataset_Exercise = pd.read_csv('01Exercise1.csv', #names = names)\ndataset_Exercise"

In [12]:
dataset_Exercise = pd.read_csv('01Exercise1.csv',)
dataset_Exercise

Unnamed: 0,gender,married,ch,income,loanamt,status
0,Male,No,1.0,5849,,Y
1,Male,Yes,1.0,4583,128.0,N
2,Male,Yes,1.0,3000,66.0,Y
3,Male,Yes,1.0,2583,120.0,Y
4,Male,No,1.0,6000,141.0,Y
...,...,...,...,...,...,...
609,Female,No,1.0,2900,71.0,Y
610,Male,Yes,1.0,4106,40.0,Y
611,Male,Yes,1.0,8072,253.0,Y
612,Male,Yes,1.0,7583,187.0,Y


In [13]:
dataset_Exercise.dtypes

gender      object
married     object
ch         float64
income       int64
loanamt    float64
status      object
dtype: object

In [14]:
#find out columns with missing values
dataset_Exercise.isnull().sum()

gender     13
married     3
ch         50
income      0
loanamt    22
status      0
dtype: int64

In [15]:
# Replace Missing Values. Drop the rows.
dataset_Exercise.fillna(dataset_Exercise.gender.mode()[0], inplace = True)
dataset_Exercise.fillna(dataset_Exercise.married.mode()[0], inplace = True)
dataset_Exercise.fillna(dataset_Exercise.loanamt.mode()[0], inplace = True)

In [17]:
dataset_Exercise.isnull().sum()


gender     0
married    0
ch         0
income     0
loanamt    0
status     0
dtype: int64

In [18]:
# Drop irrelevant columns based on business sense
dataset_Exercise = dataset_Exercise.drop('ch', axis = 1)

In [19]:
#dataset_Exercise = pd.get_dummies(dataset_Exercise)

In [20]:
dataset_Exercise

Unnamed: 0,gender,married,income,loanamt,status
0,Male,No,5849,Male,Y
1,Male,Yes,4583,128,N
2,Male,Yes,3000,66,Y
3,Male,Yes,2583,120,Y
4,Male,No,6000,141,Y
...,...,...,...,...,...
609,Female,No,2900,71,Y
610,Male,Yes,4106,40,Y
611,Male,Yes,8072,253,Y
612,Male,Yes,7583,187,Y


In [21]:
# Create Dummy variables
X_exercise = dataset_Exercise.drop('status', 1)
Y_exercise = dataset_Exercise.status
X_exercise

Unnamed: 0,gender,married,income,loanamt
0,Male,No,5849,Male
1,Male,Yes,4583,128
2,Male,Yes,3000,66
3,Male,Yes,2583,120
4,Male,No,6000,141
...,...,...,...,...
609,Female,No,2900,71
610,Male,Yes,4106,40
611,Male,Yes,8072,253
612,Male,Yes,7583,187


In [22]:
Y_exercise

0      Y
1      N
2      Y
3      Y
4      Y
      ..
609    Y
610    Y
611    Y
612    Y
613    N
Name: status, Length: 614, dtype: object

In [23]:
X_exercise = pd.get_dummies(X_exercise)
Y_exercise = pd.get_dummies(Y_exercise)
X_exercise

Unnamed: 0,income,gender_Female,gender_Male,married_Male,married_No,married_Yes,loanamt_9.0,loanamt_17.0,loanamt_25.0,loanamt_26.0,...,loanamt_480.0,loanamt_490.0,loanamt_495.0,loanamt_496.0,loanamt_500.0,loanamt_570.0,loanamt_600.0,loanamt_650.0,loanamt_700.0,loanamt_Male
0,5849,0,1,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,4583,0,1,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,3000,0,1,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2583,0,1,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,6000,0,1,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
609,2900,1,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
610,4106,0,1,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
611,8072,0,1,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
612,7583,0,1,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [24]:
Y_exercise

Unnamed: 0,N,Y
0,0,1
1,1,0
2,0,1
3,0,1
4,0,1
...,...,...
609,0,1
610,0,1
611,0,1
612,0,1


In [30]:
# Normalize the data (Income and Loan Amount) Using StandardScaler
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X_exercise)
Rescaler_X_exercise = scaler.transform(X_exercise)

In [31]:
Rescaler_X_exercise

array([[ 0.07299082, -0.47234264,  0.47234264, ..., -0.04038962,
        -0.04038962,  5.18739732],
       [-0.13441195, -0.47234264,  0.47234264, ..., -0.04038962,
        -0.04038962, -0.1927749 ],
       [-0.39374734, -0.47234264,  0.47234264, ..., -0.04038962,
        -0.04038962, -0.1927749 ],
       ...,
       [ 0.43717437, -0.47234264,  0.47234264, ..., -0.04038962,
        -0.04038962, -0.1927749 ],
       [ 0.35706382, -0.47234264,  0.47234264, ..., -0.04038962,
        -0.04038962, -0.1927749 ],
       [-0.13441195,  2.11710719, -2.11710719, ..., -0.04038962,
        -0.04038962, -0.1927749 ]])

In [34]:
# Create the X (Independent) and Y (Dependent) dataframes
x = Rescaler_X_exercise
y = Y_exercise
x

array([[ 0.07299082, -0.47234264,  0.47234264, ..., -0.04038962,
        -0.04038962,  5.18739732],
       [-0.13441195, -0.47234264,  0.47234264, ..., -0.04038962,
        -0.04038962, -0.1927749 ],
       [-0.39374734, -0.47234264,  0.47234264, ..., -0.04038962,
        -0.04038962, -0.1927749 ],
       ...,
       [ 0.43717437, -0.47234264,  0.47234264, ..., -0.04038962,
        -0.04038962, -0.1927749 ],
       [ 0.35706382, -0.47234264,  0.47234264, ..., -0.04038962,
        -0.04038962, -0.1927749 ],
       [-0.13441195,  2.11710719, -2.11710719, ..., -0.04038962,
        -0.04038962, -0.1927749 ]])

In [35]:
y

Unnamed: 0,N,Y
0,0,1
1,1,0
2,0,1
3,0,1
4,0,1
...,...,...
609,0,1
610,0,1
611,0,1
612,0,1


In [36]:
# Split the X and Y dataset into training and testing set
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3)

In [38]:
# Import and build Support Vector Classifier
"""from sklearn import svm
svm_model = svm.SVC()
svm_model.fit(x_train, y_train)"""

ValueError: bad input shape (429, 2)

In [41]:
from sklearn import svm
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC

svm_model = OneVsRestClassifier(LinearSVC())
svm_model.fit(x_train, y_train)



OneVsRestClassifier(estimator=LinearSVC(C=1.0, class_weight=None, dual=True,
                                        fit_intercept=True, intercept_scaling=1,
                                        loss='squared_hinge', max_iter=1000,
                                        multi_class='ovr', penalty='l2',
                                        random_state=None, tol=0.0001,
                                        verbose=0),
                    n_jobs=None)

In [43]:
# Predict the outcome using Test data
pred_svm_exercise = svm_model.predict(x_test)
pred_svm_exercise

array([[0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [1, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [1, 0],
       [0, 1],
       [0, 1],
       [0, 1],
       [1, 0],
       [0, 0],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [1, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [1, 0],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [1, 0],
       [0, 1],
       [1, 0],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [1, 0],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [1, 0],
       [1, 0],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [1, 0],
       [1, 0],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [1,

In [45]:
# Build the conufsion matrix and get the accuracy/score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import multilabel_confusion_matrix
acc_svm = accuracy_score(y_test, pred_svm_exercise)
matrix_svm = multilabel_confusion_matrix(y_test, pred_svm_exercise)
acc_svm

0.5621621621621622

In [46]:
matrix_svm

array([[[88, 32],
        [47, 18]],

       [[19, 46],
        [33, 87]]], dtype=int64)

# Behbud_Hamzayev