In [46]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
import warnings
warnings.filterwarnings("ignore")


In [23]:
wine = load_wine()
wine.keys()
wine_data=wine.data
wine_label=wine.target
print(wine.target_names)
print(wine.feature_names)
print(wine.DESCR)

['class_0' 'class_1' 'class_2']
['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']
.. _wine_dataset:

Wine recognition dataset
------------------------

**Data Set Characteristics:**

    :Number of Instances: 178 (50 in each of three classes)
    :Number of Attributes: 13 numeric, predictive attributes and the class
    :Attribute Information:
 		- Alcohol
 		- Malic acid
 		- Ash
		- Alcalinity of ash  
 		- Magnesium
		- Total phenols
 		- Flavanoids
 		- Nonflavanoid phenols
 		- Proanthocyanins
		- Color intensity
 		- Hue
 		- OD280/OD315 of diluted wines
 		- Proline

    - class:
            - class_0
            - class_1
            - class_2
		
    :Summary Statistics:
    
                                   Min   Max   Mean     SD
    Alcohol:                      11.0  14.8    13.0   0.8
    Malic Acid:              

In [38]:
X_train, X_test, y_train, y_test = train_test_split(wine_data, wine_label, test_size=0.2, random_state=32)

In [50]:
decision_tree = DecisionTreeClassifier(random_state=32)
decision_tree.fit(X_train, y_train)
y_pred = decision_tree.predict(X_test)

print(classification_report(y_test, y_pred))
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print("micro 평균 f1 점수: {:.3f}".format(f1_score(y_test, y_pred, average="micro")))

              precision    recall  f1-score   support

           0       0.94      1.00      0.97        16
           1       0.89      0.80      0.84        10
           2       0.90      0.90      0.90        10

    accuracy                           0.92        36
   macro avg       0.91      0.90      0.90        36
weighted avg       0.92      0.92      0.91        36

0.9166666666666666
[[16  0  0]
 [ 1  8  1]
 [ 0  1  9]]
micro 평균 f1 점수: 0.917


In [56]:
random_forest = RandomForestClassifier(random_state=32)
random_forest.fit(X_train, y_train)
y_pred = random_forest.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(classification_report(y_test, y_pred))
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print("micro 평균 f1 점수: {:.16f}".format(f1_score(y_test, y_pred, average="micro")))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       1.00      0.90      0.95        10
           2       0.91      1.00      0.95        10

    accuracy                           0.97        36
   macro avg       0.97      0.97      0.97        36
weighted avg       0.97      0.97      0.97        36

0.9722222222222222
[[16  0  0]
 [ 0  9  1]
 [ 0  0 10]]
micro 평균 f1 점수: 0.9722222222222222


In [57]:
svm_model=svm.SVC()
svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)
print(classification_report(y_test, y_pred))
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print("micro 평균 f1 점수: {:.16f}".format(f1_score(y_test, y_pred, average="micro")))

              precision    recall  f1-score   support

           0       0.93      0.81      0.87        16
           1       0.41      0.90      0.56        10
           2       0.00      0.00      0.00        10

    accuracy                           0.61        36
   macro avg       0.45      0.57      0.48        36
weighted avg       0.53      0.61      0.54        36

0.6111111111111112
[[13  3  0]
 [ 1  9  0]
 [ 0 10  0]]
micro 평균 f1 점수: 0.6111111111111112


In [58]:
sgd_model = SGDClassifier()
sgd_model.fit(X_train, y_train)
y_pred = sgd_model.predict(X_test)
print(classification_report(y_test, y_pred))
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print("micro 평균 f1 점수: {:.16f}".format(f1_score(y_test, y_pred, average="micro")))

              precision    recall  f1-score   support

           0       0.88      0.88      0.88        16
           1       0.00      0.00      0.00        10
           2       0.40      0.80      0.53        10

    accuracy                           0.61        36
   macro avg       0.42      0.56      0.47        36
weighted avg       0.50      0.61      0.54        36

0.6111111111111112
[[14  0  2]
 [ 0  0 10]
 [ 2  0  8]]
micro 평균 f1 점수: 0.6111111111111112


In [59]:
logistic_model = LogisticRegression(solver='lbfgs', max_iter=10000).fit(X_train, y_train)
y_pred = logistic_model.predict(X_test)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(accuracy_score(y_test, y_pred))
print("micro 평균 f1 점수: {:.16f}".format(f1_score(y_test, y_pred, average="micro")))

              precision    recall  f1-score   support

           0       0.94      1.00      0.97        16
           1       0.89      0.80      0.84        10
           2       0.90      0.90      0.90        10

    accuracy                           0.92        36
   macro avg       0.91      0.90      0.90        36
weighted avg       0.92      0.92      0.91        36

[[16  0  0]
 [ 1  8  1]
 [ 0  1  9]]
0.9166666666666666
micro 평균 f1 점수: 0.9166666666666666


In [60]:
print(confusion_matrix(y_test, y_pred))
print("micro 평균 f1 점수: {:.16f}".format(f1_score(y_test, y_pred, average="micro")))
#confusion_matrix(y_test, fake_pred)
print(classification_report(y_test, y_pred))
#print(classification_report(y_test, fake_pred))
print(accuracy_score(y_test, y_pred))#, accuracy_score(y_test, fake_pred)

[[16  0  0]
 [ 1  8  1]
 [ 0  1  9]]
micro 평균 f1 점수: 0.9166666666666666
              precision    recall  f1-score   support

           0       0.94      1.00      0.97        16
           1       0.89      0.80      0.84        10
           2       0.90      0.90      0.90        10

    accuracy                           0.92        36
   macro avg       0.91      0.90      0.90        36
weighted avg       0.92      0.92      0.91        36

0.9166666666666666


정확도는 random_forest가 가장 높았다.

원하는 클래스에 맞게 분류를 해야 함으로 클래스로 분류한 것들 중에 실제로 그 클래스에 해당되는 수가 분자인 Precision이 가장 중요하다고 생각합니다. 예를 들어 화이트 와인을 레드와인 클래스에 넣으면 안되고, 논알콜에 도수가 있는 와인을 넣으면 안되기 때문에 정확도지표가 가장 중요합니다.