In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [15]:
dataset = pd.read_csv('data/boston_housing.csv').values
dataset

array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 3.9690e+02, 4.9800e+00,
        2.4000e+01],
       [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 3.9690e+02, 9.1400e+00,
        2.1600e+01],
       [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 3.9283e+02, 4.0300e+00,
        3.4700e+01],
       ...,
       [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 3.9690e+02, 5.6400e+00,
        2.3900e+01],
       [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 3.9345e+02, 6.4800e+00,
        2.2000e+01],
       [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 3.9690e+02, 7.8800e+00,
        1.1900e+01]])

In [5]:
# X: age열
x = dataset[:,6:7] # Age 열 (dataset[:,6] → 1D이므로 반드시 6:7 사용할 것)
y = dataset[:,-1] # price 열
print(x.shape, y.shape)

(506, 1) (506,)


In [6]:
from sklearn.model_selection import train_test_split
xTrain,xTest,yTrain,yTest = train_test_split(x,y,test_size=0.2,random_state=1)

In [7]:
yTrain.shape

(404,)

In [8]:
# Scaling (X,y)
from sklearn.preprocessing import StandardScaler
xSC = StandardScaler()
ySC = StandardScaler()

# scaling을 위한 정보 수집(준비): 평균과 분산 등을 찾음
xSC.fit(xTrain)
ySC.fit(yTrain.reshape(-1,1)) 
# scaler에는 반드시 2D 사용
# yTrain은 1D이므로 reshape을 이용하여 2D로 변환
# reshape(-1,1): 1열의 데이터로 변경하고, 행의 수는 데이터에 자동으로 맞춰줌
# reshape(404,1)과 동일
# ex> reshape(-1): 1D array로 변경

# 변환 수행
xTrainSC = xSC.transform(xTrain)
yTrainSC = ySC.transform(yTrain.reshape(-1,1))

# xTrainSC와 yTrainSC: xTrain과 yTrain을 scaling한 결과
# 만약, scaling된 x또는 y를 다시 기존 값으로 변경하기 위해서는
#       xSC.inverse_transform(xTrainSC), ySC.inverse_transform(yTrainSC)

In [9]:
from sklearn.svm import SVR
c = 1000
eps = 0.1
poly_d = 2
poly_coef = 1

svrLinear = SVR(kernel='linear',C=c,epsilon=eps)
# kernel = 'linear', 'rbf', 'poly', 'sigmoid' (default='rbf')
# C: C가 클 경우, error 최소화 중요도 커짐, 작을 경우, 정규화 중요도 커짐 (default=1)
# epsilon: 오차 허용 범위 (default=0.1)
svrPoly = SVR(kernel='poly',degree=poly_d,coef0=poly_coef,C=c,epsilon=eps)
# degree: polynomial degree (default=3)
# coef0: (xTx + coef0)^d (default=0)
svrRbf = SVR(kernel='rbf',gamma='scale',C=c,epsilon=eps)
# gamma: 1/(2*variance) (default='scale'=1/(특징수*분산))

In [10]:
svrLinear.fit(xTrainSC,yTrainSC.reshape(-1))
# svr.fit(x,y): x는 2D, y는 1D (y가 2D일 때 경고)
svrPoly.fit(xTrainSC,yTrainSC.reshape(-1))
svrRbf.fit(xTrainSC,yTrainSC.reshape(-1))

SVR(C=1000)

In [11]:
# predict에도 반드시 scaling된 데이터 사용
xTestSC = xSC.transform(xTest)

ySCpredLinear = svrLinear.predict(xTestSC)
ySCpredPoly = svrPoly.predict(xTestSC)
ySCpredRbf = svrRbf.predict(xTestSC)
# predict한 결과도 scaling된 결과

In [12]:
# scaling된 예측값을 기존 값으로 역변환
yPredLinear = ySC.inverse_transform(ySCpredLinear)
yPredPoly = ySC.inverse_transform(ySCpredPoly)
yPredRbf = ySC.inverse_transform(ySCpredRbf)

ValueError: Expected 2D array, got 1D array instead:
array=[-2.24030444e-01 -3.11014264e-02 -4.35003182e-01  4.54691063e-01
 -5.83516886e-01 -6.07112521e-01  4.26931493e-01 -6.36260071e-01
 -5.50205401e-01 -1.65735345e-01 -2.87877457e-01  3.61696501e-01
 -1.53243538e-01  3.20057145e-01 -5.03014131e-01 -6.36260071e-01
 -5.43265509e-01 -4.00303719e-01  2.18734711e-01 -5.76576994e-01
 -3.26740856e-01 -6.36260071e-01 -4.59986796e-01  1.95139076e-01
  5.04658291e-01 -5.72413058e-01 -5.93232736e-01  7.85488785e-02
 -4.89134345e-01 -6.20992307e-01  2.53434175e-01 -2.22642465e-01
  3.08953316e-01 -5.89068800e-01  3.04789381e-01  1.60439612e-01
 -5.86292843e-01 -5.65898455e-04 -6.36260071e-01 -3.55888405e-01
  5.12986162e-01  2.17346733e-01 -3.69768191e-01 -6.00172629e-01
 -5.90456779e-01 -3.10085113e-01 -6.11276457e-01 -5.62697208e-01
 -5.39101573e-01 -4.33615204e-01 -1.86555023e-01  1.74778226e-02
 -6.30249329e-02 -5.82128908e-01 -6.08500500e-01 -1.89330980e-01
 -3.28128834e-01  1.88658011e-02 -3.82259998e-01 -2.25418422e-01
 -6.36260071e-01 -2.69374908e-02 -6.20992307e-01 -4.43331053e-01
  6.15696574e-01 -5.88609972e-02 -2.65669800e-01 -7.69047183e-02
  1.64603548e-01 -5.22445830e-01  3.50592673e-01 -6.36260071e-01
 -5.16893916e-01 -4.59986796e-01 -4.30839247e-01  1.92363119e-01
 -6.01560607e-01  3.08953316e-01 -6.36260071e-01  3.67248415e-01
 -6.19604328e-01 -3.94751804e-01  2.11794819e-01 -5.61309230e-01
  1.35455999e-01  4.72734784e-01 -6.12664436e-01  5.47685626e-01
 -2.28194379e-01 -6.11276457e-01 -5.11342002e-01 -4.54434882e-01
 -6.19604328e-01 -1.72675238e-01 -3.40620641e-01 -3.98915740e-01
 -2.07374701e-01  7.76197279e-03 -3.35068727e-01 -3.34185554e-03
 -7.69047183e-02 -6.36260071e-01].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [None]:
plt.scatter(xTrain,yTrain,alpha = 0.5)
plt.scatter(xTest,yTest,linewidth=5,alpha = 0.5,color='orange')

xGrid = np.arange(min(x)-5,max(x)+5,0.1).reshape(-1,1)
# svr model을 그리기 위한 좌표의 x값 
# (xGrid, 각 model의 prediction)을 이어서 model을 그림

xGridSc = xSC.transform(xGrid)
# xGrid를 SVR에 이용한 scale로 변경
yGridLinearSc = svrLinear.predict(xGridSc)
yGridPolySc = svrPoly.predict(xGridSc)
yGridRbfSc = svrRbf.predict(xGridSc)
# 각 model의 prediction

yGridLinear = ySC.inverse_transform(yGridLinearSc)
yGridPoly = ySC.inverse_transform(yGridPolySc)
yGridRbf = ySC.inverse_transform(yGridRbfSc)
# 각 model prediction을 다시 원래 scale로 변경

plt.plot(xGrid,yGridLinear,color='purple',linewidth=3, label='Linear SVR')
plt.plot(xGrid,yGridPoly, color='green',linewidth=3, label='Poly SVR')
plt.plot(xGrid,yGridRbf, color='red', linewidth=3, label='RBF SVR')
plt.legend()

In [None]:
yGidLinear

In [None]:
print(svrRbf.support_vectors_)
# 잘못 분류된 학습 데이터 (w를 결정함)
print(svrRbf.support_)
# 잘못 분류된 학습 데이터의 index
print(svrRbf.intercept_)
# model의 bias
print(svrRbf.n_support_)
# support vector의 수
print(svrRbf.dual_coef_)
# supporv vector의 alpha와 target(y)의 곱

# model을 생성할 때 필요한 데이터
# → w: support_vectors, dual_coef_
# → b: intercept_
# 미리 지정한 gamma값과, Polynomial일 경우, degree와 coef0도 필요

### RBF SVR prediction

In [None]:
svrRbf.predict([[0.5]])

In [None]:
x = [[0.5]]
pred = 0
gamma = 1 / (xTrainSC.shape[1] * xTrainSC.var())

for i in np.arange(svrRbf.n_support_):
    pred += svrRbf.dual_coef_[0,i] * np.exp(-gamma * (np.linalg.norm(svrRbf.support_vectors_[i,:]-x))**2)
    
pred + svrRbf.intercept_


### Polynomial SVR prediction

In [None]:
svrPoly.predict([[0.5]])

In [None]:
x = [[0.5]]
pred = 0
gamma = 1 / (xTrainSC.shape[1] * xTrainSC.var())

for i in np.arange(svrPoly.n_support_):
    pred += 
    
print(pred + svrPoly.intercept_)

### Linear SVR prediction

In [None]:
svrLinear.predict([[0.5]])

In [None]:
x = [[0.5]]
pred = 0
