# 그리드 서치

# 최고 와인 품질 예측

## 라이브러리 불러오기

In [1]:
# 기본 패키지
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 전처리 및 모델링 준비를 위한 패키지
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV

# Hyper Parameter 탐색에 사용할 모델
from sklearn.svm import SVC

# 모델 평가를 위한 패키지
from sklearn.metrics import accuracy_score, precision_score, recall_score

## 데이터 불러오기

In [2]:
wine = pd.read_csv('./data/winequality-red.csv')
df = wine.copy()
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


## 데이터 전처리

1. 와인의 품질을 good, bad로 나누기

2. 모델링에 활용할 수 있도록 라벨링 진행

3. 종속변수와 독립변수 분류

4. 학습 및 평가에 활용할 데이터를 나누기

<br>

pandas.cut : 데이터를 구간 길이에 따라 나눌 때 사용

pandas.cut(데이터, 구간(bins), label)

In [3]:
df['quality'].value_counts()

quality
5    681
6    638
7    199
4     53
8     18
3     10
Name: count, dtype: int64

In [4]:
# step1

bins = (2, 6.5, 8 ) # 2 ~ 6.5, 6.5 ~ 8로 구간 나누기
labels = ['bad', 'good']
df['quality'] = pd.cut(df['quality'], bins, labels = labels)

df.head()
df['quality'].value_counts()

quality
bad     1382
good     217
Name: count, dtype: int64

In [5]:
# stpe2
le = LabelEncoder()
df['quality'] = le.fit_transform(df['quality'])

# step3
y = df['quality']
X = df.drop('quality', axis = 1)

# step4
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

## 그리드서치를 사용해 하이퍼파라미터 찾기

### 하이퍼파라미터 튜닝 이전

In [6]:
svc = SVC()
svc.fit(X_train, y_train)
pred = svc.predict(X_test)

print('Accuracy : ', accuracy_score(y_test, pred))
print('Precision : ', precision_score(y_test, pred))
print('Recall : ', recall_score(y_test, pred))

Accuracy :  0.85625
Precision :  1.0
Recall :  0.02127659574468085


### 그리드 서치 사용

In [7]:
# step1 모델 선언
svc = SVC()

# step2 튜닝에 사용할 파라미터 나열
params = {'C' : [0.1, 0.8, 0.9, 1, 1.1, 1.2, 1.3, 1.4],
          'gamma' : [0.1, 0.8, 0.9, 1, 1.1, 1.2, 1.3, 1.4]}

# step3 교차 검증을 통해 모델의 학습 성과 높이기
kf = KFold(random_state = 30,
           n_splits = 10,
           shuffle = True)

# step4 그리드서치 모델 선언
grid = GridSearchCV(estimator = svc,
                    param_grid = params,
                    cv = kf,
                    verbose = 2)

# step5 그리드서치를 활용한 학습 진행
grid.fit(X_train, y_train)

Fitting 10 folds for each of 64 candidates, totalling 640 fits
[CV] END ...................................C=0.1, gamma=0.1; total time=   0.0s
[CV] END ...................................C=0.1, gamma=0.1; total time=   0.0s
[CV] END ...................................C=0.1, gamma=0.1; total time=   0.0s
[CV] END ...................................C=0.1, gamma=0.1; total time=   0.0s
[CV] END ...................................C=0.1, gamma=0.1; total time=   0.0s
[CV] END ...................................C=0.1, gamma=0.1; total time=   0.0s
[CV] END ...................................C=0.1, gamma=0.1; total time=   0.0s
[CV] END ...................................C=0.1, gamma=0.1; total time=   0.0s
[CV] END ...................................C=0.1, gamma=0.1; total time=   0.0s
[CV] END ...................................C=0.1, gamma=0.1; total time=   0.0s
[CV] END ...................................C=0.1, gamma=0.8; total time=   0.0s
[CV] END ...................................C=

[CV] END ...................................C=0.8, gamma=0.9; total time=   0.0s
[CV] END ...................................C=0.8, gamma=0.9; total time=   0.0s
[CV] END ...................................C=0.8, gamma=0.9; total time=   0.0s
[CV] END ...................................C=0.8, gamma=0.9; total time=   0.0s
[CV] END ...................................C=0.8, gamma=0.9; total time=   0.0s
[CV] END ...................................C=0.8, gamma=0.9; total time=   0.0s
[CV] END .....................................C=0.8, gamma=1; total time=   0.0s
[CV] END .....................................C=0.8, gamma=1; total time=   0.0s
[CV] END .....................................C=0.8, gamma=1; total time=   0.0s
[CV] END .....................................C=0.8, gamma=1; total time=   0.0s
[CV] END .....................................C=0.8, gamma=1; total time=   0.0s
[CV] END .....................................C=0.8, gamma=1; total time=   0.0s
[CV] END ...................

[CV] END ...................................C=0.9, gamma=1.1; total time=   0.0s
[CV] END ...................................C=0.9, gamma=1.1; total time=   0.0s
[CV] END ...................................C=0.9, gamma=1.1; total time=   0.0s
[CV] END ...................................C=0.9, gamma=1.2; total time=   0.0s
[CV] END ...................................C=0.9, gamma=1.2; total time=   0.0s
[CV] END ...................................C=0.9, gamma=1.2; total time=   0.0s
[CV] END ...................................C=0.9, gamma=1.2; total time=   0.0s
[CV] END ...................................C=0.9, gamma=1.2; total time=   0.0s
[CV] END ...................................C=0.9, gamma=1.2; total time=   0.0s
[CV] END ...................................C=0.9, gamma=1.2; total time=   0.0s
[CV] END ...................................C=0.9, gamma=1.2; total time=   0.0s
[CV] END ...................................C=0.9, gamma=1.2; total time=   0.0s
[CV] END ...................

[CV] END .....................................C=1, gamma=1.4; total time=   0.0s
[CV] END .....................................C=1, gamma=1.4; total time=   0.0s
[CV] END .....................................C=1, gamma=1.4; total time=   0.0s
[CV] END .....................................C=1, gamma=1.4; total time=   0.0s
[CV] END .....................................C=1, gamma=1.4; total time=   0.0s
[CV] END .....................................C=1, gamma=1.4; total time=   0.0s
[CV] END ...................................C=1.1, gamma=0.1; total time=   0.0s
[CV] END ...................................C=1.1, gamma=0.1; total time=   0.0s
[CV] END ...................................C=1.1, gamma=0.1; total time=   0.0s
[CV] END ...................................C=1.1, gamma=0.1; total time=   0.0s
[CV] END ...................................C=1.1, gamma=0.1; total time=   0.0s
[CV] END ...................................C=1.1, gamma=0.1; total time=   0.0s
[CV] END ...................

[CV] END ...................................C=1.2, gamma=0.8; total time=   0.0s
[CV] END ...................................C=1.2, gamma=0.8; total time=   0.0s
[CV] END ...................................C=1.2, gamma=0.8; total time=   0.0s
[CV] END ...................................C=1.2, gamma=0.9; total time=   0.0s
[CV] END ...................................C=1.2, gamma=0.9; total time=   0.0s
[CV] END ...................................C=1.2, gamma=0.9; total time=   0.0s
[CV] END ...................................C=1.2, gamma=0.9; total time=   0.0s
[CV] END ...................................C=1.2, gamma=0.9; total time=   0.0s
[CV] END ...................................C=1.2, gamma=0.9; total time=   0.0s
[CV] END ...................................C=1.2, gamma=0.9; total time=   0.0s
[CV] END ...................................C=1.2, gamma=0.9; total time=   0.0s
[CV] END ...................................C=1.2, gamma=0.9; total time=   0.0s
[CV] END ...................

[CV] END ...................................C=1.3, gamma=1.1; total time=   0.0s
[CV] END ...................................C=1.3, gamma=1.1; total time=   0.0s
[CV] END ...................................C=1.3, gamma=1.1; total time=   0.0s
[CV] END ...................................C=1.3, gamma=1.1; total time=   0.0s
[CV] END ...................................C=1.3, gamma=1.1; total time=   0.0s
[CV] END ...................................C=1.3, gamma=1.1; total time=   0.0s
[CV] END ...................................C=1.3, gamma=1.1; total time=   0.0s
[CV] END ...................................C=1.3, gamma=1.1; total time=   0.0s
[CV] END ...................................C=1.3, gamma=1.1; total time=   0.0s
[CV] END ...................................C=1.3, gamma=1.2; total time=   0.0s
[CV] END ...................................C=1.3, gamma=1.2; total time=   0.0s
[CV] END ...................................C=1.3, gamma=1.2; total time=   0.0s
[CV] END ...................

[CV] END ...................................C=1.4, gamma=1.3; total time=   0.0s
[CV] END ...................................C=1.4, gamma=1.3; total time=   0.0s
[CV] END ...................................C=1.4, gamma=1.3; total time=   0.0s
[CV] END ...................................C=1.4, gamma=1.3; total time=   0.0s
[CV] END ...................................C=1.4, gamma=1.3; total time=   0.0s
[CV] END ...................................C=1.4, gamma=1.4; total time=   0.0s
[CV] END ...................................C=1.4, gamma=1.4; total time=   0.0s
[CV] END ...................................C=1.4, gamma=1.4; total time=   0.0s
[CV] END ...................................C=1.4, gamma=1.4; total time=   0.0s
[CV] END ...................................C=1.4, gamma=1.4; total time=   0.0s
[CV] END ...................................C=1.4, gamma=1.4; total time=   0.0s
[CV] END ...................................C=1.4, gamma=1.4; total time=   0.0s
[CV] END ...................

In [9]:
grid.best_params_

{'C': 1.2, 'gamma': 1.4}

In [10]:
svc = SVC(C = 12, gamma = 1.4)
svc.fit(X_train, y_train)
pred = svc.predict(X_test)

print('Accuracy : ', accuracy_score(y_test, pred))
print('Precision : ', precision_score(y_test, pred))
print('Recall : ', recall_score(y_test, pred))

Accuracy :  0.89375
Precision :  1.0
Recall :  0.2765957446808511


=> 이전보다 정확도가 증가하였다