In [2]:
# Support Vector Machine

# The objective of the support vector machine algorithm is to find a hyperplane in an N-dimensional space
# (N — the number of features) that distinctly classifies the data points.

In [3]:
import numpy as np
import pandas as pd 
import statsmodels.api as sm
import statsmodels.formula.api as smf
import seaborn as sns
from sklearn.preprocessing import scale 
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.metrics import roc_auc_score,roc_curve
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

from warnings import filterwarnings
filterwarnings('ignore')

In [4]:
diabetes = pd.read_csv("C:/Users/Derya/Downloads/diabetes.csv")

In [5]:
df = diabetes.copy()
df = df.dropna()
y = df["Outcome"]
X = df.drop(['Outcome'], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.30, 
                                                    random_state=42)

In [6]:
svm_model = SVC(kernel = "linear").fit(X_train, y_train)

In [7]:
y_pred = svm_model.predict(X_test)

In [8]:
accuracy_score(y_test, y_pred)

0.7445887445887446

In [9]:
svc_params = {"C": np.arange(1,10)}

svc = SVC(kernel = "linear")

svc_cv_model = GridSearchCV(svc,svc_params, 
                            cv = 10, 
                            n_jobs = -1, 
                            verbose = 2 )

svc_cv_model.fit(X_train, y_train)

Fitting 10 folds for each of 9 candidates, totalling 90 fits


In [12]:
svc_cv_model.best_params_

{'C': 5}

In [13]:
svc_tuned = SVC(kernel = "linear", C = 5).fit(X_train, y_train)

In [15]:
y_pred = svc_tuned.predict(X_test)


In [16]:
accuracy_score(y_test, y_pred)

0.7445887445887446

In [17]:
# Radial basis function
# Radial Basis Functions are a special class of feed-forward neural networks consisting of three layers: 
#     an input layer, a hidden layer, and the output layer. This is fundamentally different from 
#     most neural network architectures, which are composed of many layers and bring about nonlinearity by recurrently
#     applying non-linear activation functions. The input layer receives input data and passes it into the hidden layer, 
#     where the computation occurs. The hidden layer of Radial Basis Functions Neural Network is the most powerful 
#     and very different from most Neural networks. The output layer is designated for prediction tasks like classification 
#     or regression. 

In [18]:
svc_model = SVC(kernel = "rbf").fit(X_train, y_train)

In [19]:
y_pred = svc_model.predict(X_test)
accuracy_score(y_test, y_pred)

0.7359307359307359

In [20]:
svc_params = {"C": [0.0001, 0.001, 0.1, 1, 5, 10 ,50 ,100],
             "gamma": [0.0001, 0.001, 0.1, 1, 5, 10 ,50 ,100]}

In [21]:
svc = SVC()
svc_cv_model = GridSearchCV(svc, svc_params, 
                         cv = 10, 
                         n_jobs = -1,
                         verbose = 2)

svc_cv_model.fit(X_train, y_train)

Fitting 10 folds for each of 64 candidates, totalling 640 fits


In [22]:
svc_tuned = SVC(C = 10, gamma = 0.0001).fit(X_train, y_train)

In [23]:
y_pred = svc_tuned.predict(X_test)
accuracy_score(y_test, y_pred)

0.7359307359307359