# [Machine Learning] Supervised Learning

## Support Vector Machine Classification

### Import Libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

### Load Dataset

In [2]:
from sklearn.datasets import load_breast_cancer
dt = load_breast_cancer()

In [3]:
dt.data

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

In [4]:
dt.feature_names

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

In [5]:
dt.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,

In [6]:
dt.target_names

array(['malignant', 'benign'], dtype='<U9')

### Create Dataframe

In [7]:
df = pd.DataFrame(np.c_[dt.data,dt.target], columns=[list(dt.feature_names)+['target']])
df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0.0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0.0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0.0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0.0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0.0


In [8]:
df.shape

(569, 31)

### Split Data

In [9]:
x = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [10]:
x.shape, y.shape

((569, 30), (569,))

In [11]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=.2,random_state=22)

In [12]:
x_train.shape, y_train.shape

((455, 30), (455,))

In [13]:
x_test.shape, y_test.shape

((114, 30), (114,))

### Support Vector Machine Classification Model 

In [14]:
from sklearn.svm import SVC

In [15]:
svc_rbf = SVC(kernel='rbf')
svc_rbf.fit(x_train, y_train)

SVC()

In [16]:
svc_rbf.score(x_test,y_test)

0.9210526315789473

### Feature Scaling

In [17]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(x_train)

StandardScaler()

In [18]:
x_train_sc = sc.transform(x_train)
x_test_sc = sc.transform(x_test)

### SVM with Kernel RBF

In [19]:
svc_rbf_2 = SVC(kernel='rbf')
svc_rbf_2.fit(x_train_sc,y_train)
svc_rbf_2.score(x_test_sc,y_test)

0.9736842105263158

### SVM with Kernel Polynomial

In [20]:
svc_poly = SVC(kernel='poly', degree=2)
svc_poly.fit(x_train_sc,y_train)
svc_poly.score(x_test_sc,y_test)

0.8421052631578947

### SVM with Kernel Linear

In [21]:
svc_linear = SVC(kernel='linear')
svc_linear.fit(x_train_sc,y_train)
svc_linear.score(x_test_sc,y_test)

0.9649122807017544

### Predict Cancer

In [22]:
pre = np.random.rand(1,30)
pre

array([[0.98899838, 0.17674035, 0.83564619, 0.87112669, 0.0776352 ,
        0.46558528, 0.27213639, 0.77765415, 0.4676584 , 0.8698595 ,
        0.19735083, 0.2300335 , 0.31175717, 0.24470402, 0.18234864,
        0.14658277, 0.97990084, 0.94449088, 0.2902526 , 0.48223937,
        0.28589722, 0.28954188, 0.62767645, 0.14475447, 0.07783611,
        0.86991889, 0.73386653, 0.56262997, 0.58445434, 0.28684389]])

In [23]:
pre_sc = sc.transform(pre)
pre_sc

array([[ -3.7495677 ,  -4.54411036,  -3.77993943,  -1.85529796,
         -1.37161824,   6.96265555,   2.38698241,  19.3583568 ,
         10.6711957 , 113.37819627,  -0.73322435,  -1.74467108,
         -1.22939066,  -0.85032197,  58.47808459,   6.6970448 ,
         30.12463713, 149.61838388,  33.29897707, 169.27014496,
         -3.30790659,  -4.17727592,  -3.17671265,  -1.53060965,
         -2.43108672,   3.91854444,   2.23446646,   6.94237642,
          5.12729427,  10.95659966]])

In [24]:
svc_rbf_2.predict(pre_sc)

array([0.])

In [25]:
dt.target_names

array(['malignant', 'benign'], dtype='<U9')

In [26]:
df.tail()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
564,21.56,22.39,142.0,1479.0,0.111,0.1159,0.2439,0.1389,0.1726,0.05623,...,26.4,166.1,2027.0,0.141,0.2113,0.4107,0.2216,0.206,0.07115,0.0
565,20.13,28.25,131.2,1261.0,0.0978,0.1034,0.144,0.09791,0.1752,0.05533,...,38.25,155.0,1731.0,0.1166,0.1922,0.3215,0.1628,0.2572,0.06637,0.0
566,16.6,28.08,108.3,858.1,0.08455,0.1023,0.09251,0.05302,0.159,0.05648,...,34.12,126.7,1124.0,0.1139,0.3094,0.3403,0.1418,0.2218,0.0782,0.0
567,20.6,29.33,140.1,1265.0,0.1178,0.277,0.3514,0.152,0.2397,0.07016,...,39.42,184.6,1821.0,0.165,0.8681,0.9387,0.265,0.4087,0.124,0.0
568,7.76,24.54,47.92,181.0,0.05263,0.04362,0.0,0.0,0.1587,0.05884,...,30.37,59.16,268.6,0.08996,0.06444,0.0,0.0,0.2871,0.07039,1.0


## Save Model

In [27]:
import pickle

In [28]:
pickle.dump(svc_linear, open('save_mode', 'wb'))

In [29]:
save = pickle.load(open('save_mode', 'rb'))

In [30]:
save.predict(pre_sc)

array([1.])

In [31]:
import joblib

In [32]:
joblib.dump(svc_poly, 'save_model2')

['save_model2']

In [33]:
save2 = joblib.load('save_model2')

In [34]:
save2.predict(pre_sc)

array([1.])