# Support Vector Machine.

A Support Vector Machine (SVM) is a discriminative classifier formally defined by a separating hyperplane.  Given a set of training examples, each marked as belonging to one or the other of two categories, an SVM training algorithm builds a model that assigns new examples to one category or the other, making it a non-probabilistic binary linear classifier.

In this notebook we will just train the model (after hyperparameters tuning) and save it in a ".sav" document for further tests. Other metrics that evaluate the performance of the model have been developed in the "08_Conclusions" notebook.

In [11]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

In [2]:
lc_data = pd.read_csv("lc_cleaned.csv")
lc_data.drop('Unnamed: 0', inplace=True, axis=1)
lc_data = lc_data.dropna()

In [3]:
lc_data

Unnamed: 0,funded_amnt,int_rate,annual_inc,loan_status,dti,delinq_2yrs,fico_range_low,inq_last_6mths,open_acc,pub_rec,...,num_tl_op_past_12m,pct_tl_nvr_dlq,pub_rec_bankruptcies,tax_liens,total_bal_ex_mort,home_ownership_cat,verification_status_cat,application_type_Joint App,hardship_flag_Y,debt_settlement_flag_Y
0,-0.090625,-0.9024,2.000000,0.0,-0.075856,0.0,1.000000,0.0,1.285714,1.0,...,0.0,-0.206897,1.0,0.0,1.523500,0.0,-0.5,0.0,0.0,0.0
1,-0.455208,1.8976,-0.340909,0.0,-0.966558,0.0,0.000000,0.0,-1.000000,0.0,...,-1.0,-2.011494,0.0,0.0,-0.777654,0.5,0.0,0.0,0.0,0.0
2,-0.430208,-0.9024,1.022727,0.0,-0.391517,0.0,0.714286,2.0,1.142857,0.0,...,0.5,0.287356,0.0,0.0,0.197300,-0.5,-0.5,0.0,0.0,0.0
3,-0.221875,0.4848,-0.295932,0.0,-0.637031,0.0,0.142857,0.0,0.571429,2.0,...,0.5,0.287356,2.0,0.0,-0.648005,0.5,0.0,0.0,0.0,0.0
4,0.111458,0.4848,0.227273,0.0,-0.575856,1.0,0.000000,0.0,-1.000000,0.0,...,0.5,-0.862069,0.0,0.0,-0.243986,-0.5,0.5,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
456681,-0.090625,0.4160,-0.900000,0.0,0.747145,0.0,-0.142857,0.0,-0.285714,0.0,...,-0.5,0.287356,0.0,0.0,-0.434451,0.5,0.5,0.0,0.0,0.0
456682,-0.055208,-0.1600,-0.045455,-1.0,0.470636,1.0,0.000000,0.0,0.285714,0.0,...,0.5,0.091954,0.0,0.0,1.816346,-0.5,0.5,0.0,0.0,0.0
456683,0.028125,0.4800,-0.681818,-1.0,1.058728,0.0,-0.142857,0.0,-0.285714,1.0,...,0.5,0.287356,1.0,0.0,-0.087141,0.5,0.5,0.0,0.0,0.0
456684,-0.055208,1.1200,-0.013636,-1.0,0.756117,1.0,0.285714,2.0,0.857143,0.0,...,0.0,-0.287356,0.0,0.0,0.498309,0.5,0.0,0.0,0.0,0.0


In [4]:
y = lc_data['loan_status']
X = lc_data.drop('loan_status', axis=1)

In [7]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)

In [14]:
%%time


gsearch_SVM = GridSearchCV(estimator=SVC(max_iter = 1000),
                           param_grid= {'C': [1, 10], 'kernel': ('linear', 'rbf')} ,
                           scoring='roc_auc', n_jobs=-1, iid=False, cv=5)
gsearch_SVM.fit(x_train, y_train)



CPU times: user 59.5 s, sys: 1.33 s, total: 1min
Wall time: 5min 19s




GridSearchCV(cv=5, estimator=SVC(max_iter=1000), iid=False, n_jobs=-1,
             param_grid={'C': [1, 10], 'kernel': ('linear', 'rbf')},
             scoring='roc_auc')

In [15]:
gsearch_SVM_optimum = gsearch_SVM.best_estimator_
gsearch_SVM_optimum.fit(x_train, y_train)



SVC(C=10, max_iter=1000)

In [None]:
import pickle

def save_models(filename, model):
    with open(filename, 'wb') as file:
        pickle.dump(model, file)

In [None]:
save_models('svm_model2.sav', gsearch_SVM_optimum)