In [1]:
from xgboost import XGBClassifier
import sklearn.datasets as datasets
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier # KNN
from sklearn.tree import DecisionTreeClassifier  # 决策树
from sklearn.ensemble import RandomForestClassifier # 随机森林
from sklearn.ensemble import ExtraTreesClassifier   # 极度随机森林
from sklearn.ensemble import AdaBoostClassifier     # 提升算法 AdaBoost, 
from sklearn.ensemble import GradientBoostingClassifier # 梯度树提升

In [2]:
wine=datasets.load_wine()
print(wine["DESCR"])
"""
============================= ==== ===== ======= =====
                                   Min   Max   Mean     SD
    ============================= ==== ===== ======= =====
    Alcohol:                      11.0  14.8    13.0   0.8
    Malic Acid:                   0.74  5.80    2.34  1.12
    Ash:                          1.36  3.23    2.36  0.27
    Alcalinity of Ash:            10.6  30.0    19.5   3.3
    Magnesium:                    70.0 162.0    99.7  14.3
    Total Phenols:                0.98  3.88    2.29  0.63
    Flavanoids:                   0.34  5.08    2.03  1.00
    Nonflavanoid Phenols:         0.13  0.66    0.36  0.12
    Proanthocyanins:              0.41  3.58    1.59  0.57
    Colour Intensity:              1.3  13.0     5.1   2.3
    Hue:                          0.48  1.71    0.96  0.23
    OD280/OD315 of diluted wines: 1.27  4.00    2.61  0.71
    Proline:                       278  1680     746   315
    ============================= ==== ===== ======= =====
"""

.. _wine_dataset:

Wine recognition dataset
------------------------

**Data Set Characteristics:**

    :Number of Instances: 178 (50 in each of three classes)
    :Number of Attributes: 13 numeric, predictive attributes and the class
    :Attribute Information:
 		- Alcohol
 		- Malic acid
 		- Ash
		- Alcalinity of ash  
 		- Magnesium
		- Total phenols
 		- Flavanoids
 		- Nonflavanoid phenols
 		- Proanthocyanins
		- Color intensity
 		- Hue
 		- OD280/OD315 of diluted wines
 		- Proline

    - class:
            - class_0
            - class_1
            - class_2
		
    :Summary Statistics:
    
                                   Min   Max   Mean     SD
    Alcohol:                      11.0  14.8    13.0   0.8
    Malic Acid:                   0.74  5.80    2.34  1.12
    Ash:                          1.36  3.23    2.36  0.27
    Alcalinity of Ash:            10.6  30.0    19.5   3.3
    Magnesium:                    70.0 162.0    99.7  14.3
    Total Phenols:                0



In [3]:
X=wine["data"]
y=wine["target"]

In [4]:
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.2)

## KNN

In [5]:
knn=KNeighborsClassifier(n_neighbors=10,weights="distance")
knn.fit(x_train,y_train)
knn.score(x_test,y_test)

0.8333333333333334

## 决策树

In [6]:
clf=DecisionTreeClassifier()
clf.fit(x_train,y_train)
clf.score(x_test,y_test)

0.9166666666666666

## 随机森林 极度随机森林 提升算法 AdaBoost  梯度树提升

In [7]:
estimators={}
estimators["forest"]=RandomForestClassifier(n_estimators=100) 
estimators["extra"]=ExtraTreesClassifier(n_estimators=100)
estimators["ada"]=AdaBoostClassifier(n_estimators=100)
estimators["gdbt"]=GradientBoostingClassifier(n_estimators=100)

In [8]:
for key,model in estimators.items():
    model.fit(x_train,y_train)
    score=model.score(x_test,y_test)
    print(key,score)

forest 0.9722222222222222
extra 0.9722222222222222
ada 0.9166666666666666
gdbt 0.9722222222222222


In [9]:
import warnings
warnings.filterwarnings("ignore")

In [19]:
%%time
score=0
for i in range(100):
    x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.2)

    xgboost=XGBClassifier(n_estimators=100)
    xgboost.fit(x_train,y_train)
    score+=xgboost.score(x_test,y_test)
score.mean()
# 96.38888888888893







Wall time: 9.55 s


95.88888888888891

In [17]:
%%time
score=0
for i in range(100):
    x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.2)

    gdbt=GradientBoostingClassifier(n_estimators=100)
    gdbt.fit(x_train,y_train)
    score+=gdbt.score(x_test,y_test)
score.mean()
# 94.6388888888889

Wall time: 23.6 s


93.30555555555554

## LGBM

In [13]:
from lightgbm import LGBMClassifier

In [18]:
%%time
score=0
for i in range(100):
    x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.2)

    lgbm=LGBMClassifier(n_estimators=100)
    lgbm.fit(x_train,y_train)
    score+=lgbm.score(x_test,y_test)
score.mean()
# 94.6388888888889

Wall time: 7.46 s


97.22222222222229