In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn import svm

In [3]:
data = pd.read_csv("winequality-red.csv")
print(data['quality'].value_counts())
bins = (2, 6.5, 8)
group_names = [0, 1] # 0:Bad, 1:Good
data['quality'] = pd.cut(data['quality'], bins=bins, labels=group_names)     # quality를 수치가 아닌, good or bad 둘중 하나로 변경.
print(data['quality'].value_counts())
X = data.drop('quality', axis=1)                # feature data
y = data['quality']                 # target data

371836


In [45]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, train_size=0.8, random_state=42)
print(len(X_train))
print(len(y_train))
print(len(X_test))
print(len(y_test))

1279
1279
320
320


In [47]:
standardscaler = StandardScaler()                   
X_train_scaled = standardscaler.fit_transform(X_train)
X_test_scaled = standardscaler.transform(X_test)

In [48]:
svm_model = svm.SVC(random_state=1234)

In [49]:
param_grid = [
    {'C': [0.1, 1, 10], 'degree':[2, 3], 'kernel': ['poly']},
    {'C': [0.1, 1, 10], 'gamma': [0.0001, 0.001, 0.01], 'kernel': ['linear', 'rbf']},]

In [50]:
clf = GridSearchCV(svm_model, param_grid)
clf.fit(X_train_scaled, y_train)

GridSearchCV(estimator=SVC(random_state=1234),
             param_grid=[{'C': [0.1, 1, 10], 'degree': [2, 3],
                          'kernel': ['poly']},
                         {'C': [0.1, 1, 10], 'gamma': [0.0001, 0.001, 0.01],
                          'kernel': ['linear', 'rbf']}])

In [51]:
pred = clf.predict(X_test_scaled)

In [53]:
conf_mat = confusion_matrix(y_test, pred)
clf_re = classification_report(y_test, pred)
roc = roc_auc_score(y_test, pred)

In [63]:
print("confusion matrix:\n\n", conf_mat)
print("\nㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡ\n")
print("classification report:\n\n", clf_re)
print("\nㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡ\n")
print("roc_auc score:\n\n", roc)
print("\nㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡ\n")
print("Best parameters:", clf.best_params_)

confusion matrix:

 [[253  20]
 [ 25  22]]

ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡ

classification report:

               precision    recall  f1-score   support

           0       0.91      0.93      0.92       273
           1       0.52      0.47      0.49        47

    accuracy                           0.86       320
   macro avg       0.72      0.70      0.71       320
weighted avg       0.85      0.86      0.86       320


ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡ

roc_auc score:

 0.6974125165614528

ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡ

Best parameters: {'C': 10, 'degree': 3, 'kernel': 'poly'}
