# Lab 5. Using Support Vector Machine
## 5.1 Using Support Vector Machine Classification
- step 1. Load Moon test dataset

In [1]:
#Step1.load Moon test Dataset
from sklearn import datasets
moon = datasets.make_moons(shuffle=False)
X = moon[0][:, 0].reshape(-1,1) #use only a features x1
y=moon[1] #label

In [2]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Imputer
from sklearn.preprocessing import StandardScaler
num_pipeline = Pipeline([
        ('imputer', Imputer(strategy="median")),
        ('std_scaler', StandardScaler()),
    ])
X_prepared = num_pipeline.fit_transform(X)



In [3]:
#training set & test set 나누기
from sklearn.model_selection import train_test_split
X_train , X_test , y_train , y_test = train_test_split(X_prepared,y,test_size=0.2,random_state=42)

- Step 2. Train linear and non-linear SVC model using the dataset

In [4]:
#LinearSVC
import time
from sklearn.svm import LinearSVC
from sklearn.preprocessing import PolynomialFeatures
lin_svm_clf = LinearSVC(C=10, loss="hinge", random_state=42)
poly_svm_clf = Pipeline([
        ("poly_features", PolynomialFeatures(degree=3)),
        ("linear_svc", LinearSVC(C=10, loss="hinge", random_state=42)),
    ])
start_time = time.time()
lin_svm_clf.fit(X_train,y_train)
print('linear svc training time : {}'.format(time.time()-start_time))
start_time = time.time()
poly_svm_clf.fit(X_train,y_train)
print('polynomial linear svc training time : {}'.format(time.time()-start_time))

linear svc training time : 0.0009963512420654297
polynomial linear svc training time : 0.0029909610748291016




In [5]:
#SVC : Non-Linear
from sklearn.svm import SVC
poly_kernel_svm_clf = SVC(kernel='poly',coef0=1, C=5,random_state=42)
rbf_kernel_svm_clf = SVC(kernel='rbf',gamma=5, C=0.001,random_state=42)
start_time = time.time()
poly_kernel_svm_clf.fit(X_train,y_train)
print('polynomial kernel svc training time : {}'.format(time.time()-start_time))
start_time = time.time()
rbf_kernel_svm_clf.fit(X_train,y_train)
print('rbf kernel svc training time : {}'.format(time.time()-start_time))

polynomial kernel svc training time : 0.003988027572631836
rbf kernel svc training time : 0.0009951591491699219




- Step 3. Evaluate that whether the dataset is linear or not

In [6]:
from sklearn.metrics import accuracy_score
def getScore(model):
    y_score = model.predict(X_test)
    accuracy = accuracy_score(y_test,y_score)
    return accuracy
#linearSVC
print('Linear SVC Accuracy : {}'.format(getScore(lin_svm_clf)))
print('Polynomial_Linear SVC Accuracy : {}'.format(getScore(poly_svm_clf)))
print('Polynomial_Kernel SVC Accuracy : {}'.format(getScore(poly_kernel_svm_clf)))
print('rbf_Kernel SVC Accuracy : {}'.format(getScore(rbf_kernel_svm_clf)))

Linear SVC Accuracy : 0.75
Polynomial_Linear SVC Accuracy : 0.7
Polynomial_Kernel SVC Accuracy : 0.7
rbf_Kernel SVC Accuracy : 0.4


- Step 4. Find the best hyperparameter in the picked model

In [9]:
def getAccuracy(grid):
    cvres =grid.cv_results_
    for mean_score, params in zip(cvres["mean_train_score"], cvres["params"]):
        print(mean_score, params)
    print('##########################################################')
    print('Best hyperparameter: {}'.format(grid.best_params_))
    print('Best accuracy of hyperparameter : {}'.format(grid.best_score_))

In [10]:
#pick SVR : poly kernel 
#Grid Search
from sklearn.model_selection import GridSearchCV
poly_ker_svm_clf = SVC(kernel='poly',random_state=42)
param_grid=[
    {'coef0':[1 ,5 , 10],
     'C':[0.001,0.01,0.1,5,10,100]}
]
grid_svc =GridSearchCV(poly_ker_svm_clf, param_grid, scoring='accuracy', cv=10, n_jobs=-1)
grid_svc.fit(X_train,y_train)
getAccuracy(grid_svc)

0.6248753939160074 {'C': 0.001, 'coef0': 1}
0.6610457264132742 {'C': 0.001, 'coef0': 5}
0.6596187857740048 {'C': 0.001, 'coef0': 10}
0.710974446373829 {'C': 0.01, 'coef0': 1}
0.7095665316097497 {'C': 0.01, 'coef0': 5}
0.7137717859669432 {'C': 0.01, 'coef0': 10}
0.727775901987266 {'C': 0.1, 'coef0': 1}
0.7319045169035523 {'C': 0.1, 'coef0': 5}
0.7333129676077776 {'C': 0.1, 'coef0': 10}
0.8040750209016657 {'C': 5, 'coef0': 1}
0.8054639097905545 {'C': 5, 'coef0': 5}
0.8054639097905545 {'C': 5, 'coef0': 10}
0.8124083542349989 {'C': 10, 'coef0': 1}
0.8124083542349989 {'C': 10, 'coef0': 5}
0.8124083542349989 {'C': 10, 'coef0': 10}
0.8193533346195897 {'C': 100, 'coef0': 1}
0.8193533346195897 {'C': 100, 'coef0': 5}
0.8193533346195897 {'C': 100, 'coef0': 10}
##########################################################
Best hyperparameter: {'C': 100, 'coef0': 1}
Best accuracy of hyperparameter : 0.8125




- Step 5. Train the picked model again using all features.

In [11]:
#using all features
#picked model = poly Kernel SVM Classification
X_all = moon[0]
X_prepared = num_pipeline.fit_transform(X_all)
X_train , X_test , y_train , y_test = train_test_split(X_prepared,y,test_size=0.2,random_state=42)

In [12]:
#training time & accuracy
poly_clf = SVC(kernel='poly',coef0=1, C=5,random_state=42)
start_time = time.time()
poly_clf.fit(X_train,y_train)
print('poly kernel svc training time : {}'.format(time.time()-start_time))
print('poly_Kernel SVC Accuracy : {}'.format(getScore(poly_clf)))

poly kernel svc training time : 0.001991748809814453
poly_Kernel SVC Accuracy : 1.0




In [13]:
#grid search
poly_clf_2 = SVC(kernel='poly',random_state=42)
grid_poly_2 =GridSearchCV(poly_clf, param_grid, scoring='accuracy', cv=10, n_jobs=-1)
grid_poly_2.fit(X_train,y_train)
getAccuracy(grid_poly_2)

0.5250016078204387 {'C': 0.001, 'coef0': 1}
0.8624506935065492 {'C': 0.001, 'coef0': 5}
0.8693761120758033 {'C': 0.001, 'coef0': 10}
0.865228471284327 {'C': 0.01, 'coef0': 1}
0.8791189679936544 {'C': 0.01, 'coef0': 5}
0.8818967457714322 {'C': 0.01, 'coef0': 10}
0.9277142688704526 {'C': 0.1, 'coef0': 1}
0.920769288485862 {'C': 0.1, 'coef0': 5}
0.9166216476943856 {'C': 0.1, 'coef0': 10}
1.0 {'C': 5, 'coef0': 1}
1.0 {'C': 5, 'coef0': 5}
1.0 {'C': 5, 'coef0': 10}
1.0 {'C': 10, 'coef0': 1}
1.0 {'C': 10, 'coef0': 5}
1.0 {'C': 10, 'coef0': 10}
1.0 {'C': 100, 'coef0': 1}
1.0 {'C': 100, 'coef0': 5}
1.0 {'C': 100, 'coef0': 10}
##########################################################
Best hyperparameter: {'C': 5, 'coef0': 1}
Best accuracy of hyperparameter : 1.0


