### Importing the Required libraries

In [75]:
import numpy as np
import pandas as pd
from sklearn import svm
from matplotlib import pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import plot_precision_recall_curve
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import roc_curve, auc
from sklearn import metrics
from sklearn import preprocessing

### Load the dataset

In [76]:
data= pd.read_csv('creditscoring.csv')

In [77]:
data.head(2)

Unnamed: 0,duration,history,purpose,amount,savings,employed,installp,marital,coapp,resident,property,age,other,housing,existcr,job,depends,telephon,foreign,good_bad
0,6,4,3.0,1169,5,5,4,3,1,4,1,67,3,2,2,3,1,2,1,good
1,48,2,3.0,5951,1,3,2,2,1,2,1,22,3,2,1,3,1,1,1,bad


### Drop columns with missing values

In [78]:
for col in data:
    missing_values= data[col].isna().sum()
    if(missing_values>0):
        print('{} {}'.format(col, missing_values))

purpose 12


In [79]:
data= data.drop('purpose',axis=1)

### Dividing data into predictor variables (X) and target variable (y)

In [80]:
X= data.drop('good_bad', axis=1)
y= data['good_bad']

print(X.shape)
print(y.shape)

(1000, 18)
(1000,)


### Extract numeric features and standardize them

In [81]:
numerical_features = X[['duration', 'amount', 'age']]
names_numerical = numerical_features.columns.tolist()
min_max_scaler = preprocessing.MinMaxScaler()
numerical_features = min_max_scaler.fit_transform(numerical_features)
numerical_features = pd.DataFrame(numerical_features)
numerical_features.columns = names_numerical

In [82]:
numerical_features

Unnamed: 0,duration,amount,age
0,0.029412,0.050567,0.857143
1,0.647059,0.313690,0.053571
2,0.117647,0.101574,0.535714
3,0.558824,0.419941,0.464286
4,0.294118,0.254209,0.607143
...,...,...,...
995,0.117647,0.081765,0.214286
996,0.382353,0.198470,0.375000
997,0.117647,0.030483,0.339286
998,0.602941,0.087763,0.071429


### Extract Categorical Features and create dummy variables

In [83]:
categorical_features = X.drop(['duration', 'amount', 'age'], axis=1)
categorical_features = pd.get_dummies(categorical_features, columns=categorical_features.columns.tolist())

### Concatenate the numerical and categorical features into a dataframe

In [84]:
X_scale= pd.concat([numerical_features, categorical_features], axis=1)

In [85]:
X_scale

Unnamed: 0,duration,amount,age,history_0,history_1,history_2,history_3,history_4,savings_1,savings_2,...,job_1,job_2,job_3,job_4,depends_1,depends_2,telephon_1,telephon_2,foreign_1,foreign_2
0,0.029412,0.050567,0.857143,0,0,0,0,1,0,0,...,0,0,1,0,1,0,0,1,1,0
1,0.647059,0.313690,0.053571,0,0,1,0,0,1,0,...,0,0,1,0,1,0,1,0,1,0
2,0.117647,0.101574,0.535714,0,0,0,0,1,1,0,...,0,1,0,0,0,1,1,0,1,0
3,0.558824,0.419941,0.464286,0,0,1,0,0,1,0,...,0,0,1,0,0,1,1,0,1,0
4,0.294118,0.254209,0.607143,0,0,0,1,0,1,0,...,0,0,1,0,0,1,1,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0.117647,0.081765,0.214286,0,0,1,0,0,1,0,...,0,1,0,0,1,0,1,0,1,0
996,0.382353,0.198470,0.375000,0,0,1,0,0,1,0,...,0,0,0,1,1,0,0,1,1,0
997,0.117647,0.030483,0.339286,0,0,1,0,0,1,0,...,0,0,1,0,1,0,1,0,1,0
998,0.602941,0.087763,0.071429,0,0,1,0,0,1,0,...,0,0,1,0,1,0,0,1,1,0


### Dividing dataset into train and test

In [86]:
X_train, X_test, y_train, y_test = train_test_split(X_scale, y, test_size=0.20)

### Building a SVM classifier with linear kernel

In [87]:
SVM_classifier = svm.SVC(kernel='linear', random_state=np.random.seed(42))

In [88]:
SVM_classifier.fit(X_train, y_train)

SVC(kernel='linear')

In [89]:
y_pred = SVM_classifier.predict(X_test)

In [90]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:",metrics.precision_score(y_test, y_pred, pos_label="good"))
print("Recall:",metrics.recall_score(y_test, y_pred, pos_label="good"))

Accuracy: 0.715
Precision: 0.7337278106508875
Recall: 0.9117647058823529


In [91]:
accuracy_linear = metrics.accuracy_score(y_test, y_pred)
precision_linear = metrics.precision_score(y_test, y_pred, pos_label="good")
recall_linear = metrics.recall_score(y_test, y_pred, pos_label="good")

### Building a SVM classifier with RBF kernel

In [92]:
rbf_SVC = svm.SVC(kernel='rbf',  random_state=np.random.seed(42))

In [93]:
rbf_SVC.fit(X_train, y_train)

SVC()

In [94]:
y_pred = rbf_SVC.predict(X_test)

In [95]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:",metrics.precision_score(y_test, y_pred, pos_label="good"))
print("Recall:",metrics.recall_score(y_test, y_pred, pos_label="good"))

Accuracy: 0.665
Precision: 0.6885245901639344
Recall: 0.9264705882352942


In [96]:
accuracy_rbf = metrics.accuracy_score(y_test, y_pred)
precision_rbf = metrics.precision_score(y_test, y_pred, pos_label="good")
recall_rbf = metrics.recall_score(y_test, y_pred, pos_label="good")

### Building a SVM classifier with Polynomial kernel

In [97]:
polynomial_SVC = svm.SVC(kernel='poly', degree=3,  random_state=np.random.seed(42))

In [98]:
polynomial_SVC.fit(X_train, y_train)

SVC(kernel='poly')

In [99]:
y_pred = polynomial_SVC.predict(X_test)

In [100]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:",metrics.precision_score(y_test, y_pred, pos_label="good"))
print("Recall:",metrics.recall_score(y_test, y_pred, pos_label="good"))

Accuracy: 0.685
Precision: 0.7295597484276729
Recall: 0.8529411764705882


In [101]:
accuracy_poly = metrics.accuracy_score(y_test, y_pred)
precision_poly = metrics.precision_score(y_test, y_pred, pos_label="good")
recall_poly = metrics.recall_score(y_test, y_pred, pos_label="good")

### Building a SVM classifier with Sigmoid kernel

In [102]:
sigmoid_SVC = svm.SVC(kernel='sigmoid',  random_state=np.random.seed(42))

In [103]:
sigmoid_SVC.fit(X_train, y_train)

SVC(kernel='sigmoid')

In [104]:
y_pred = sigmoid_SVC.predict(X_test)

In [105]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:",metrics.precision_score(y_test, y_pred, pos_label="good"))
print("Recall:",metrics.recall_score(y_test, y_pred, pos_label="good"))

Accuracy: 0.68
Precision: 0.6978021978021978
Recall: 0.9338235294117647


In [106]:
accuracy_sigmoid = metrics.accuracy_score(y_test, y_pred)
precision_sigmoid = metrics.precision_score(y_test, y_pred, pos_label="good")
recall_sigmoid = metrics.recall_score(y_test, y_pred, pos_label="good")

### Comparison of Kernels for Accuracy, Precision & Recall

In [107]:
results = pd.DataFrame({'Linear': [accuracy_linear, precision_linear, recall_linear ],
                        'RBF': [accuracy_rbf, precision_rbf, recall_rbf],
                        'Polynomial': [accuracy_poly, precision_poly, recall_poly  ],
                        'Sigmoid': [accuracy_sigmoid, precision_sigmoid, recall_sigmoid ], 
    
})

In [108]:
results.index = ['Accuracy', 'Precision', 'Recall']

In [109]:
results

Unnamed: 0,Linear,RBF,Polynomial,Sigmoid
Accuracy,0.715,0.665,0.685,0.68
Precision,0.733728,0.688525,0.72956,0.697802
Recall,0.911765,0.926471,0.852941,0.933824


### Hyperparameter tuning using GridSearchCV

In [110]:
svc = svm.SVC()
param_grid = {'C': [0.1,1, 10, 100], 'gamma': [1,0.1,0.01,0.001],'kernel': ['rbf', 'poly', 'sigmoid']}
grid = GridSearchCV(svc,param_grid)
grid.fit(X_train,y_train)

GridSearchCV(estimator=SVC(),
             param_grid={'C': [0.1, 1, 10, 100], 'gamma': [1, 0.1, 0.01, 0.001],
                         'kernel': ['rbf', 'poly', 'sigmoid']})

#### Viewing the best hyperparameters

In [111]:
print(grid.best_estimator_)

SVC(C=100, gamma=0.001)


In [112]:
print('Best C:', grid.best_estimator_.get_params()['C'])

Best C: 100


In [113]:
print('Best gamma:', grid.best_estimator_.get_params()['gamma'])

Best gamma: 0.001


In [114]:
print('Best kernel:', grid.best_estimator_.get_params()['kernel'])

Best kernel: rbf
