In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import numpy as np 

from sklearn.svm import SVC

In [3]:
df = pd.read_csv("creditcard_cleaned.csv")

In [4]:
df = df.drop(['Unnamed: 0'], axis = 1)

In [5]:
df.head()

Unnamed: 0,scaled_amount,scaled_time,V1,V2,V3,V4,V5,V6,V7,V8,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Class
0,-0.201182,-1.785541,-1.38638,2.199543,1.525807,2.954481,0.264425,-0.239757,0.953239,-1.336626,...,-0.131055,0.662084,-0.086671,-0.118522,0.700997,0.380318,0.197355,-0.896647,-0.409293,0
1,-0.349231,0.128667,-23.984747,16.697832,-22.209875,9.584969,-16.230439,2.596333,-33.239328,-21.560039,...,5.804551,-12.615023,5.774087,2.750221,0.513411,-1.608804,-0.459624,-4.626127,-0.334561,1
2,-0.209898,0.56465,0.345932,-0.024238,-0.249973,-2.121791,0.461026,-0.265107,0.399168,-0.121305,...,0.177209,0.557694,1.489021,-0.219031,0.356435,0.070467,-0.027019,-0.007897,-0.016354,0
3,0.225693,1.011331,-3.61385,-0.922136,-4.749887,3.373001,-0.545207,-1.171301,-4.172315,1.517016,...,-0.320541,0.786787,0.893065,1.034907,0.097671,-1.345551,-0.788329,1.055442,0.099971,1
4,0.046539,-1.430146,-21.885434,12.930505,-24.098872,6.203314,-16.466099,-4.459842,-16.519836,14.535565,...,1.611998,1.762232,-1.579055,-0.951043,0.134565,1.50711,-0.222671,1.527655,0.453699,1


In [6]:
X = df.iloc[ :, : -1]
y = df.iloc[ :, -1]

In [69]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 24)

## SVC "one-verses-one"

In [87]:
from sklearn.svm import SVC
svc = SVC(kernel='linear', gamma='auto')
svc.fit(X_train, y_train)

SVC(gamma='auto', kernel='linear')

In [88]:
y_pred = svc.predict(X_test)

In [89]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred)

array([[88,  2],
       [11, 96]], dtype=int64)

In [90]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred) 

0.934010152284264

## NuSVC "one-versus-one"

In [13]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import NuSVC

In [63]:
nuSVC = make_pipeline(StandardScaler(), NuSVC())
nuSVC.fit(X_train, y_train)

Pipeline(steps=[('standardscaler', StandardScaler()), ('nusvc', NuSVC())])

In [64]:
y_pred = nuSVC.predict(X_test)

In [65]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred)

array([[88,  2],
       [18, 89]], dtype=int64)

In [66]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred) 

0.8984771573604061

## LinearSVC "one-vs-the-rest"
#### Highest accuracy

In [26]:
from sklearn.svm import LinearSVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_classification

In [46]:
l_SVC = make_pipeline(StandardScaler(), LinearSVC(random_state=0, tol=1e-5))
l_SVC.fit(X_train, y_train)



Pipeline(steps=[('standardscaler', StandardScaler()),
                ('linearsvc', LinearSVC(random_state=0, tol=1e-05))])

In [47]:
y_pred = l_SVC.predict(X_test)

In [48]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred)

array([[88,  2],
       [10, 97]], dtype=int64)

In [49]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred) 

0.9390862944162437

## Observations
<img src="https://scikit-learn.org/stable/_images/sphx_glr_plot_separating_hyperplane_0011.png" alt="Drawing" style="width: 200px;"/>

* SVC, NuSVC and LinearSVC are classes capable of performing binary and multi-class classification on a dataset.
* SVC and NuSVC are similar methods, but accept slightly different sets of parameters and have different mathematical formulations 
* LinearSVC is another (faster) implementation of Support Vector Classification for the case of a linear kernel



1. SVC 
    * gamma = 'scale/auto' -- there's no change
    * accuracy = 0.934010152284264

 2. nuSVC
     * nufloat, default=0.5( An upper bound on the fraction of margin errors and a lower bound of the fraction of support              vectors. Should be in the interval (0, 1].)
     * accuracy = 0.8984771573604061


3. LinearSVC
    * more flexibility in the choice of penalties and loss functions
    * accuracy = 0.9390862944162437