In [37]:
import numpy as np
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split

# 步骤1：收集数据

In [45]:
wine = load_wine()
X = wine['data']
y = wine['target']
feature_name = wine['feature_names']

In [46]:
print(wine.keys())

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names'])


In [47]:
print(wine['target_names'])

['class_0' 'class_1' 'class_2']


In [48]:
print(wine['data'].shape)

(178, 13)


In [49]:
print(wine['target'].shape)

(178,)


In [50]:
print(wine['feature_names'])

['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']


# 步骤2：数据准备

In [51]:
train_X,val_X,train_y,val_y = train_test_split(X,y,test_size=0.3,random_state=0)

In [52]:
print(train_X.shape)

(124, 13)


In [53]:
print(124.0/178.0)

0.6966292134831461


# 步骤3：选择一个模型

In [88]:
from sklearn import tree
clf=tree.DecisionTreeClassifier(criterion="entropy")

In [89]:
clf

DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=None,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best')

# 步骤4：训练

In [90]:
clf=clf.fit(train_X,train_y)

In [91]:
print(clf.get_depth())

4


# 步骤5：评估

In [92]:
acc = clf.score(train_X, train_y)
print('训练集准确率：'+str(acc))

acc = clf.score(val_X, val_y)
print('验证集准确率：'+str(acc))

训练集准确率：1.0
验证集准确率：0.9259259259259259


# 附加：模型可视化

In [93]:
import graphviz
# 自定义特征名字
feature_name = ['酒精','苹果酸','灰','灰的碱性'
                ,'镁','总酚','类黄酮','非黄烷类酚类'
          ,'花青素','颜 色强度','色调','od280/od315稀释葡萄酒','脯氨酸']
 
#class_name我们知道有三类，就赋值为["琴酒", "雪莉", "贝尔摩德"]
dot_data = tree.export_graphviz(clf
                    , feature_names=feature_name,
                     class_names=["琴酒", "雪莉", "贝尔摩德"]
                    , filled=True
                    ,rounded=True)

In [94]:
# filled=True 填充颜色
# rounded=True  圆弧矩形
graph = graphviz.Source(dot_data)

In [95]:
graph.view()

'Source.gv.pdf'

In [97]:
clf.feature_importances_

array([0.        , 0.02030084, 0.        , 0.        , 0.05918425,
       0.        , 0.42421878, 0.        , 0.01414231, 0.1650984 ,
       0.        , 0.        , 0.31705542])

In [98]:
print(*zip(feature_name,clf.feature_importances_))

('酒精', 0.0) ('苹果酸', 0.02030084009602607) ('灰', 0.0) ('灰的碱性', 0.0) ('镁', 0.059184249461736185) ('总酚', 0.0) ('类黄酮', 0.4242187763407541) ('非黄烷类酚类', 0.0) ('花青素', 0.01414231223429707) ('颜 色强度', 0.16509839982151525) ('色调', 0.0) ('od280/od315稀释葡萄酒', 0.0) ('脯氨酸', 0.3170554220456714)
