In [134]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
import numpy as np

In [155]:
wine = load_wine()
wine_data = wine.data
wine_label = wine.target

# Data Analysis

In [156]:
print(dir(wine))

['DESCR', 'data', 'feature_names', 'target', 'target_names']


In [157]:
print(wine.DESCR)

.. _wine_dataset:

Wine recognition dataset
------------------------

**Data Set Characteristics:**

    :Number of Instances: 178 (50 in each of three classes)
    :Number of Attributes: 13 numeric, predictive attributes and the class
    :Attribute Information:
 		- Alcohol
 		- Malic acid
 		- Ash
		- Alcalinity of ash  
 		- Magnesium
		- Total phenols
 		- Flavanoids
 		- Nonflavanoid phenols
 		- Proanthocyanins
		- Color intensity
 		- Hue
 		- OD280/OD315 of diluted wines
 		- Proline

    - class:
            - class_0
            - class_1
            - class_2
		
    :Summary Statistics:
    
                                   Min   Max   Mean     SD
    Alcohol:                      11.0  14.8    13.0   0.8
    Malic Acid:                   0.74  5.80    2.34  1.12
    Ash:                          1.36  3.23    2.36  0.27
    Alcalinity of Ash:            10.6  30.0    19.5   3.3
    Magnesium:                    70.0 162.0    99.7  14.3
    Total Phenols:                0

In [158]:
wine_data.shape

(178, 13)

In [159]:
wine.target.shape

(178,)

Data Split

In [160]:
X_train, X_test, y_train, y_test = train_test_split(wine_data, 
                                                    wine_label, 
                                                    test_size=0.2, 
                                                    random_state=7)

# 모델 생성 (ensemble: randomforest, svm, logist regression)

In [161]:
decision_tree = DecisionTreeClassifier(random_state=32)
decision_tree.fit(X_train, y_train)
y_pred = decision_tree.predict(X_test)

In [162]:
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from lightgbm import LGBMClassifier

from sklearn.metrics import accuracy_score

In [163]:
#개별 모델 생성
svm_ = svm.SVC()
randomforest = RandomForestClassifier(n_estimators = 100, random_state = 0)
logisticreg = LogisticRegression()

In [164]:
#각 모델 학습
svm_.fit(X_train, y_train)
randomforest.fit(X_train, y_train)
logisticreg.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [165]:
svm_pred = svm_.predict(X_train)
rf_pred = randomforest.predict(X_train)
lr_pred = logisticreg.predict(X_train)

In [166]:
new_data = np.array([svm_pred, rf_pred, lr_pred])
new_data.shape

(3, 142)

In [167]:
new_data = np.transpose(new_data)
new_data.shape

(142, 3)

In [168]:
#최종 모델
lgbm = LGBMClassifier()

In [169]:
lgbm.fit(new_data, y_train)

LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
               importance_type='split', learning_rate=0.1, max_depth=-1,
               min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
               n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,
               random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=True,
               subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

# 모델테스트

In [170]:
sp = svm_.predict(X_test)
rp = randomforest.predict(X_test)
lp = logisticreg.predict(X_test)

new_test = np.array([sp, rp, lp])
new_test = np.transpose(new_test)
lgbm_pred = lgbm.predict(new_test)

정확도 측정 (classification_report)

In [171]:
print(classification_report(y_test, lgbm_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       1.00      1.00      1.00        17
           2       1.00      1.00      1.00        12

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36



# Cf. 단일모델 정확도 측정

# SVM predict
svm prediction result

In [172]:
print(classification_report(y_test, sp))

              precision    recall  f1-score   support

           0       0.86      0.86      0.86         7
           1       0.58      0.88      0.70        17
           2       0.33      0.08      0.13        12

    accuracy                           0.61        36
   macro avg       0.59      0.61      0.56        36
weighted avg       0.55      0.61      0.54        36



# Random Forest predict
randomforest prediction result

In [173]:
print(classification_report(y_test, sp))

              precision    recall  f1-score   support

           0       0.86      0.86      0.86         7
           1       0.58      0.88      0.70        17
           2       0.33      0.08      0.13        12

    accuracy                           0.61        36
   macro avg       0.59      0.61      0.56        36
weighted avg       0.55      0.61      0.54        36



# Logistic Regression predict
logistic prediction result

In [174]:
print(classification_report(y_test, lp))

              precision    recall  f1-score   support

           0       1.00      0.86      0.92         7
           1       0.94      1.00      0.97        17
           2       1.00      1.00      1.00        12

    accuracy                           0.97        36
   macro avg       0.98      0.95      0.96        36
weighted avg       0.97      0.97      0.97        36

