# Algoritmo SVM
## Clasificación lineal

In [113]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.datasets import load_iris

%matplotlib inline

### Importando data
https://archive.ics.uci.edu/ml/datasets/banknote+authentication

In [114]:
bankdata = pd.read_csv("./data/bill_authentication.csv")  

### Explorando data

In [115]:
bankdata.head()

Unnamed: 0,Variance,Skewness,Curtosis,Entropy,Class
0,3.6216,8.6661,-2.8073,-0.44699,0
1,4.5459,8.1674,-2.4586,-1.4621,0
2,3.866,-2.6383,1.9242,0.10645,0
3,3.4566,9.5228,-4.0112,-3.5944,0
4,0.32924,-4.4552,4.5718,-0.9888,0


In [116]:
bankdata.shape

(1372, 5)

![Linear clasification](./img/linear.png)

### Preprocessing

In [117]:
x = bankdata.drop('Class', axis = 1)
x.head()

Unnamed: 0,Variance,Skewness,Curtosis,Entropy
0,3.6216,8.6661,-2.8073,-0.44699
1,4.5459,8.1674,-2.4586,-1.4621
2,3.866,-2.6383,1.9242,0.10645
3,3.4566,9.5228,-4.0112,-3.5944
4,0.32924,-4.4552,4.5718,-0.9888


In [118]:
y = bankdata['Class']
y.head()

0    0
1    0
2    0
3    0
4    0
Name: Class, dtype: int64

### Train and Test Data

In [119]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)

In [120]:
x_train.shape

(1097, 4)

In [121]:
x_test.shape

(275, 4)

In [122]:
y_train.shape

(1097,)

In [123]:
y_test.shape

(275,)

### Train SVC model

In [124]:
svc_classifier = SVC(kernel = 'linear')
svc_classifier.fit(x_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

### Haciendo predicciones

In [125]:
y_pred = svc_classifier.predict(x_test)
y_pred

array([1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0,
       0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0,
       1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0,
       1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1,
       1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1,
       1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0,
       0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1,
       0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1,
       0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0])

In [126]:
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

print('Accuracy of SVC on training set: {:.2f}'.format(svc_classifier.score(x_train, y_train)))
print('Accuracy of SVC on test set: {:.2f}'.format(svc_classifier.score(x_test, y_test)))

[[140   1]
 [  1 133]]
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       141
           1       0.99      0.99      0.99       134

   micro avg       0.99      0.99      0.99       275
   macro avg       0.99      0.99      0.99       275
weighted avg       0.99      0.99      0.99       275

Accuracy of SVC on training set: 0.99
Accuracy of SVC on test set: 0.99


Se puede observar que solo hay una mala clasificación con el algoritmo de SVM

## Clasificación no lineal
### Importando data

In [127]:
iris = load_iris()
iris_data = pd.DataFrame(iris.data, columns = iris.feature_names)

### Explorando data

In [128]:
iris_data.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [129]:
iris_data.shape

(150, 4)

![NonLinear clasification](./img/non-linear.png)

### Preprocessing

In [130]:
x = iris.data[:, :]
y = iris.target
x.shape

(150, 4)

In [131]:
y.shape

(150,)

### Train and Test Data

In [132]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20)  
x_train.shape

(120, 4)

In [133]:
x_test.shape

(30, 4)

### Train SVC model

In [134]:
svc_classifier = SVC(kernel='poly', degree=8)  
svc_classifier.fit(x_train, y_train)  



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=8, gamma='auto_deprecated',
  kernel='poly', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

### Haciendo predicciones con Kernel polinomial

In [135]:
y_pred = svc_classifier.predict(x_test)  
y_pred

array([0, 1, 2, 2, 0, 1, 1, 1, 0, 1, 2, 0, 1, 1, 1, 0, 0, 2, 0, 1, 1, 2,
       0, 0, 0, 1, 2, 0, 0, 1])

In [136]:
print(confusion_matrix(y_test, y_pred))  
print(classification_report(y_test, y_pred))  

print('Accuracy of SVC on training set: {:.2f}'.format(svc_classifier.score(x_train, y_train)))
print('Accuracy of SVC on test set: {:.2f}'.format(svc_classifier.score(x_test, y_test)))

[[12  0  0]
 [ 0 11  1]
 [ 0  1  5]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       0.92      0.92      0.92        12
           2       0.83      0.83      0.83         6

   micro avg       0.93      0.93      0.93        30
   macro avg       0.92      0.92      0.92        30
weighted avg       0.93      0.93      0.93        30

Accuracy of SVC on training set: 1.00
Accuracy of SVC on test set: 0.93


### Haciendo predicciones con Kernel Gaussian

In [137]:
svc_classifier = SVC(kernel='rbf')  
svc_classifier.fit(x_train, y_train) 



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [138]:
y_pred = svc_classifier.predict(x_test)  
y_pred

array([0, 1, 1, 2, 0, 1, 1, 1, 0, 1, 2, 0, 1, 1, 1, 0, 0, 2, 0, 1, 1, 2,
       0, 0, 0, 2, 2, 0, 0, 1])

In [139]:
print(confusion_matrix(y_test, y_pred))  
print(classification_report(y_test, y_pred)) 


print('Accuracy of SVC on training set: {:.2f}'.format(svc_classifier.score(x_train, y_train)))
print('Accuracy of SVC on test set: {:.2f}'.format(svc_classifier.score(x_test, y_test)))

[[12  0  0]
 [ 0 12  0]
 [ 0  0  6]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       1.00      1.00      1.00        12
           2       1.00      1.00      1.00         6

   micro avg       1.00      1.00      1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Accuracy of SVC on training set: 0.97
Accuracy of SVC on test set: 1.00


### Haciendo predicciones con Kernel Sigmoid

In [140]:
svc_classifier = SVC(kernel='sigmoid')  
svc_classifier.fit(x_train, y_train)  



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='sigmoid', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [141]:
y_pred = svc_classifier.predict(x_test)
y_pred

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2])

In [142]:
print(confusion_matrix(y_test, y_pred))  
print(classification_report(y_test, y_pred))  


print('Accuracy of SVC on training set: {:.2f}'.format(svc_classifier.score(x_train, y_train)))
print('Accuracy of SVC on test set: {:.2f}'.format(svc_classifier.score(x_test, y_test)))

[[ 0  0 12]
 [ 0  0 12]
 [ 0  0  6]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        12
           1       0.00      0.00      0.00        12
           2       0.20      1.00      0.33         6

   micro avg       0.20      0.20      0.20        30
   macro avg       0.07      0.33      0.11        30
weighted avg       0.04      0.20      0.07        30

Accuracy of SVC on training set: 0.37
Accuracy of SVC on test set: 0.20


  'precision', 'predicted', average, warn_for)


El que peor se comporto fue el kernel sigmoid, debido a su naturaleza de retornar solo 1 o 0, es mas aconsejable para problemas de clasificación binarios.

Podemos concluid que depende mas de probrar diferentes configuraciones.