##Import Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

#Read the data

In [None]:
from google.colab import files          #Upload 01Exercise1.csv from the "Classification" folder of the course 
uploaded = files.upload()

Saving 01Exercise1.csv to 01Exercise1.csv


In [None]:
data_original = pd.read_csv('01Exercise1.csv')
data = data_original.copy()
data.head()

Unnamed: 0,gender,married,ch,income,loanamt,status
0,Male,No,1.0,5849,,Y
1,Male,Yes,1.0,4583,128.0,N
2,Male,Yes,1.0,3000,66.0,Y
3,Male,Yes,1.0,2583,120.0,Y
4,Male,No,1.0,6000,141.0,Y


##Remove irrelevant variables

In [None]:
data = data.drop(['gender'], axis=1) #Bank cannot discreminate based on gender

##Null values

In [None]:
data.isnull().sum(axis=0)

married     3
ch         50
income      0
loanamt    22
status      0
dtype: int64

In [None]:
data.shape

(614, 5)

In [None]:
#Remove any row that has at least 1 missing value
data = data.dropna()
data.shape

(541, 5)

##Normalization

In [None]:
data.head(2)

Unnamed: 0,married,ch,income,loanamt,status
1,Yes,1.0,4583,128.0,N
2,Yes,1.0,3000,66.0,Y


In [None]:
from scipy.stats import zscore

In [None]:
Normalized_data = data.iloc[:, 2:4].apply(zscore)
Normalized_data.head(2)

Unnamed: 0,income,loanamt
1,-0.141051,-0.204324
2,-0.390889,-0.947889


In [None]:
data.iloc[:, 2:4] = Normalized_data
data.head(2)

Unnamed: 0,married,ch,income,loanamt,status
1,Yes,1.0,-0.141051,-0.204324,N
2,Yes,1.0,-0.390889,-0.947889,Y


## Create Dummy Variables and drop first to avoid dummy variables trap

In [None]:
data.dtypes  #variables to get dummie need to become categorical first

married     object
ch         float64
income     float64
loanamt    float64
status      object
dtype: object

In [None]:
data = pd.get_dummies(data, drop_first=True) 

In [None]:
data.head(2)

Unnamed: 0,ch,income,loanamt,married_Yes,status_Y
1,1.0,-0.141051,-0.204324,1,0
2,1.0,-0.390889,-0.947889,1,1


#Create Train and test split

In [None]:
Y = data[ ['status_Y'] ]             #this way Y would remain a dataframe but Y=data['demand'] would be a pandas series
X = data.drop(['status_Y'], axis=1)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, random_state = 1234, stratify=Y)

#Fit the model using default parameters

In [None]:
# import SVM
from sklearn.svm import SVC   #SVC: Support Vector Classifier
                              #SVR: Support Vector Regression
svc_loan = SVC()
svc_loan.fit(X_train, Y_train)

In [None]:
# Create Y Predictions
Y_predict = svc_loan.predict(X_test)  # Y_predict would be a Pandas Series, lets convert it to Pandas DaraFrame
Y_predict = pd.DataFrame(Y_predict)


score = svc_loan.score(X_test, Y_test)
print('Model Score is:', score)

Model Score is: 0.7914110429447853


  y = column_or_1d(y, warn=True)


#Evaluate accuracy using Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
cm = confusion_matrix(Y_test, Y_predict)
cm

array([[ 20,  31],
       [  3, 109]])

In [None]:
cr = classification_report(Y_test, Y_predict)

print (cr)

              precision    recall  f1-score   support

           0       0.87      0.39      0.54        51
           1       0.78      0.97      0.87       112

    accuracy                           0.79       163
   macro avg       0.82      0.68      0.70       163
weighted avg       0.81      0.79      0.76       163



# Another classification problem:
##Using different Kernel Functions for SVC

In [None]:
from sklearn import datasets   #this library includes lots of datasets to practice!

iris = datasets.load_iris()
#iris

In [None]:
X= iris.data
Y= iris.target
X.shape

(150, 4)

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, random_state = 1234, stratify=Y)

#Kernel: RBF with gamma= 1.0

In [None]:
# import SVM
from sklearn.svm import SVC   #SVC: Support Vector Classifier
                              #SVR: Support Vector Regression

svc_iris = SVC(kernel='rbf', gamma= 1.0)  #specify what kernel function you want to use and what is the value of gamma
svc_iris.fit(X_train, Y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=1.0, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [None]:
# Create Y Predictions
Y_predict = svc_iris.predict(X_test)  # Y_predict would be a Pandas Series, lets convert it to Pandas DaraFrame
Y_predict = pd.DataFrame(Y_predict)

In [None]:
cm_rbf_1 = confusion_matrix(Y_test, Y_predict)
cm_rbf_1 #very good prediction! only 1 wrong prediction: 1 point in class 2 has been predicted to belong to class 3

array([[15,  0,  0],
       [ 0, 14,  1],
       [ 0,  0, 15]])

#Kernel: RBF with gamma= 10.0

In [None]:
# import SVM
from sklearn.svm import SVC   #SVC: Support Vector Classifier
                              #SVR: Support Vector Regression

svc_iris = SVC(kernel='rbf', gamma= 10.0)  #specify what kernel function you want to use and what is the value of gamma
svc_iris.fit(X_train, Y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=10.0, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [None]:
# Create Y Predictions
Y_predict = svc_iris.predict(X_test)  # Y_predict would be a Pandas Series, lets convert it to Pandas DaraFrame
Y_predict = pd.DataFrame(Y_predict)

In [None]:
cm_rbf_10 = confusion_matrix(Y_test, Y_predict)
cm_rbf_10 #Accuracy decreased as we increased the radus of classification circle (gamma increasd)! 3 wrong prediction

array([[14,  0,  1],
       [ 0, 13,  2],
       [ 0,  0, 15]])

#Kernel: Linear

In [None]:
# import SVM
from sklearn.svm import SVC   #SVC: Support Vector Classifier
                              #SVR: Support Vector Regression

svc_iris = SVC(kernel='linear')  #specify what kernel function you want to use 
svc_iris.fit(X_train, Y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [None]:
# Create Y Predictions
Y_predict = svc_iris.predict(X_test)  # Y_predict would be a Pandas Series, lets convert it to Pandas DaraFrame
Y_predict = pd.DataFrame(Y_predict)

In [None]:
cm_linear = confusion_matrix(Y_test, Y_predict)
cm_linear

array([[15,  0,  0],
       [ 0, 13,  2],
       [ 0,  0, 15]])

#Kernel: Polynomial

In [None]:
# import SVM
from sklearn.svm import SVC   #SVC: Support Vector Classifier
                              #SVR: Support Vector Regression

svc_iris = SVC(kernel='poly')  #specify what kernel function you want to use
svc_iris.fit(X_train, Y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='poly',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [None]:
# Create Y Predictions
Y_predict = svc_iris.predict(X_test)  # Y_predict would be a Pandas Series, lets convert it to Pandas DaraFrame
Y_predict = pd.DataFrame(Y_predict)

In [None]:
cm_poly = confusion_matrix(Y_test, Y_predict)
cm_poly

array([[15,  0,  0],
       [ 0, 13,  2],
       [ 0,  0, 15]])

#Kernel: Sigmoid

In [None]:
# import SVM
from sklearn.svm import SVC   #SVC: Support Vector Classifier
                              #SVR: Support Vector Regression

svc_iris = SVC(kernel='sigmoid')  #specify what kernel function you want to use
svc_iris.fit(X_train, Y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='sigmoid',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [None]:
# Create Y Predictions
Y_predict = svc_iris.predict(X_test)  # Y_predict would be a Pandas Series, lets convert it to Pandas DaraFrame
Y_predict = pd.DataFrame(Y_predict)

In [None]:
cm_sigmoid = confusion_matrix(Y_test, Y_predict)
cm_sigmoid #Obviously wrong Kernel to use for this dataset!

array([[ 3,  0, 12],
       [13,  0,  2],
       [15,  0,  0]])