# **SVM2**

In [None]:
from sklearn.datasets import fetch_openml
import pandas as pd
import numpy as np
from sklearn.svm import LinearSVC, SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

## **MNIST Dataset**


*   MNIST dataset은 손으로 쓴 숫자들을 28x28의 데이터로 변환한 데이터셋
*   총 7만 개의 데이터로 이루어져 있음



In [None]:
mnist=fetch_openml('mnist_784',version=1,cache=True)

In [None]:
X=mnist['data']
y=mnist['target'].astype(np.uint8)

X_train=X[:10000]
y_train=y[:10000]
X_test=X[10000:12000]
y_test=y[10000:12000]

## **Linear SVC**

In [None]:
# SVC : Support Vector Classifier

lin_clf=LinearSVC(random_state=42, max_iter=5000)
lin_clf.fit(X_train,y_train)



LinearSVC(max_iter=5000, random_state=42)

In [None]:
lin_pred_train = lin_clf.predict(X_train)
lin_pred_test = lin_clf.predict(X_test)

# Training data와 Test data에 대한 성능 평가

print(f'Training accuracy : {accuracy_score(y_train, lin_pred_train):.4f} | Test accuracy : {accuracy_score(y_test, lin_pred_test):.4f}')

Training accuracy : 0.9520 | Test accuracy : 0.8515


## **Polynomial Kernel**

In [None]:
poly_clf = SVC(kernel='poly', degree=3) # 2차원의 데이터를 3차원의 공간상으로 변형
poly_clf.fit(X_train, y_train)

SVC(kernel='poly')

In [None]:
poly_pred_train = poly_clf.predict(X_train)
poly_pred_test = poly_clf.predict(X_test)

# Training data와 Test data에 대한 성능 평가

print(f'Training accuracy : {accuracy_score(y_train, poly_pred_train):.4f} | Test accuracy : {accuracy_score(y_test, poly_pred_test):.4f}')

Training accuracy : 0.9884 | Test accuracy : 0.9560


## **Radial Bias Kernel**

In [None]:
rbf_clf = SVC(kernel='rbf') # 2차원 데이터를 무한한 차원의 공간상으로 변형
rbf_clf.fit(X_train, y_train)

SVC()

In [None]:
rbf_pred_train = rbf_clf.predict(X_train)
rbf_pred_test = rbf_clf.predict(X_test)

# Training data와 Test data에 대한 성능 평가

print(f'Training accuracy : {accuracy_score(y_train, rbf_pred_train):.4f} | Test accuracy : {accuracy_score(y_test, rbf_pred_test):.4f}')

Training accuracy : 0.9851 | Test accuracy : 0.9655


# **퀴즈 1**

*   make_moons 데이터에 대하여 SVM 모델을 생성하고 학습된 모델의 정확도를 도출하시오. 

*   단, Test accuracy는 90% 이상이어야 한다.





In [None]:
from sklearn.datasets import make_moons
X, y = make_moons(n_samples=100, noise=0.25, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [None]:
rbf_clf = SVC(kernel='rbf') # 2차원 데이터를 무한한 차원의 공간상으로 변형
rbf_clf.fit(X_train, y_train)

SVC()

In [None]:
rbf_pred_train = rbf_clf.predict(X_train)
rbf_pred_test = rbf_clf.predict(X_test)

# Training data와 Test data에 대한 성능 평가

print(f'Training accuracy : {accuracy_score(y_train, rbf_pred_train):.4f} | Test accuracy : {accuracy_score(y_test, rbf_pred_test):.4f}')

Training accuracy : 0.9467 | Test accuracy : 0.9600
