# Lab 5. Using Support Vector Machine
## 5.1 Using Support Vector Machine Classification
- step 1. Load Moon test dataset

In [1]:
#Step1.load Moon test Dataset
from sklearn import datasets
moon = datasets.make_moons(shuffle=False)
X = moon[0][:, 0].reshape(-1,1) #use only a features x1
y=moon[1] #label

In [2]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Imputer
from sklearn.preprocessing import StandardScaler
num_pipeline = Pipeline([
        ('imputer', Imputer(strategy="median")),
        ('std_scaler', StandardScaler()),
    ])
X_prepared = num_pipeline.fit_transform(X)



In [3]:
#training set & test set 나누기
from sklearn.model_selection import train_test_split
X_train , X_test , y_train , y_test = train_test_split(X_prepared,y,test_size=0.2,random_state=42)

- Step 2. Train linear and non-linear SVC model using the dataset

In [4]:
#LinearSVC
import time
from sklearn.svm import LinearSVC
from sklearn.preprocessing import PolynomialFeatures
lin_svm_clf = LinearSVC(C=10, loss="hinge", random_state=42)
poly_svm_clf = Pipeline([
        ("poly_features", PolynomialFeatures(degree=3)),
        ("linear_svc", LinearSVC(C=10, loss="hinge", random_state=42)),
    ])
start_time = time.time()
lin_svm_clf.fit(X_train,y_train)
print('linear svc training time : {}'.format(time.time()-start_time))
start_time = time.time()
poly_svm_clf.fit(X_train,y_train)
print('polynomial linear svc training time : {}'.format(time.time()-start_time))

linear svc training time : 0.0009968280792236328
polynomial linear svc training time : 0.003989219665527344




In [5]:
#SVC : Non-Linear
from sklearn.svm import SVC
poly_kernel_svm_clf = SVC(kernel='poly',coef0=1, C=5,random_state=42)
rbf_kernel_svm_clf = SVC(kernel='rbf',gamma=5, C=0.001,random_state=42)
start_time = time.time()
poly_kernel_svm_clf.fit(X_train,y_train)
print('polynomial kernel svc training time : {}'.format(time.time()-start_time))
start_time = time.time()
rbf_kernel_svm_clf.fit(X_train,y_train)
print('rbf kernel svc training time : {}'.format(time.time()-start_time))



polynomial kernel svc training time : 0.001995563507080078
rbf kernel svc training time : 0.0019943714141845703


- Step 3. Evaluate that whether the dataset is linear or not

In [6]:
from sklearn.metrics import accuracy_score
def getScore(model):
    y_score = model.predict(X_test)
    accuracy = accuracy_score(y_test,y_score)
    return accuracy
#linearSVC
print('Linear SVC Accuracy : {}'.format(getScore(lin_svm_clf)))
print('Polynomial_Linear SVC Accuracy : {}'.format(getScore(poly_svm_clf)))
print('Polynomial_Kernel SVC Accuracy : {}'.format(getScore(poly_kernel_svm_clf)))
print('rbf_Kernel SVC Accuracy : {}'.format(getScore(rbf_kernel_svm_clf)))

Linear SVC Accuracy : 0.75
Polynomial_Linear SVC Accuracy : 0.7
Polynomial_Kernel SVC Accuracy : 0.7
rbf_Kernel SVC Accuracy : 0.4


- Step 4. Find the best hyperparameter in the picked model

In [7]:
def getAccuracy(grid):
    cvres =grid.cv_results_
    for mean_score, params in zip(cvres["mean_train_score"], cvres["params"]):
        print(mean_score, params)
    print('##########################################################')
    print('Best hyperparameter: {}'.format(grid.best_params_))
    print('Best accuracy of hyperparameter : {}'.format(grid.best_score_))

In [8]:
#pick : linearSVC
#Grid Search
from sklearn.model_selection import GridSearchCV
lin_svc = LinearSVC(loss="hinge", random_state=42)
param_grid=[
    {'C':[0.001,0.01,0.1,5,10,100]}
]
grid_svc =GridSearchCV(lin_svc, param_grid, scoring='accuracy', cv=10, n_jobs=-1)
grid_svc.fit(X_train,y_train)
getAccuracy(grid_svc)

0.6458049285055845 {'C': 0.001}
0.6458049285055845 {'C': 0.01}
0.6471943533346196 {'C': 0.1}
0.65412030784402 {'C': 5}
0.65412030784402 {'C': 10}
0.6554706090423821 {'C': 100}
##########################################################
Best hyperparameter: {'C': 0.1}
Best accuracy of hyperparameter : 0.65




- Step 5. Train the picked model again using all features.

In [9]:
#using all features
#picked model = poly Kernel SVM Classification
X_all = moon[0]
X_prepared = num_pipeline.fit_transform(X_all)
X_train , X_test , y_train , y_test = train_test_split(X_prepared,y,test_size=0.2,random_state=42)

In [10]:
#training time & accuracy
lin_svc_2 = LinearSVC(C=0.1, loss="hinge", random_state=42)
start_time = time.time()
lin_svc_2.fit(X_train,y_train)
print('poly kernel svc training time : {}'.format(time.time()-start_time))
print('poly_Kernel SVC Accuracy : {}'.format(getScore(lin_svc_2)))

poly kernel svc training time : 0.001994609832763672
poly_Kernel SVC Accuracy : 0.85


In [12]:
#grid search
lin_svc_3 = LinearSVC(C=0.1, loss="hinge", random_state=42)
grid_poly_2 =GridSearchCV(lin_svc_3, param_grid, scoring='accuracy', cv=10, n_jobs=-1)
grid_poly_2.fit(X_train,y_train)
getAccuracy(grid_poly_2)

0.849949621626257 {'C': 0.001}
0.8555443008124852 {'C': 0.01}
0.8680258108774412 {'C': 0.1}
0.8763602160910668 {'C': 5}
0.8818972817115786 {'C': 10}
0.8860639483782451 {'C': 100}
##########################################################
Best hyperparameter: {'C': 10}
Best accuracy of hyperparameter : 0.8875


