## 使用决策树解决隐形眼镜分类问题(DecisionTreeClassifier)

In [1]:
import pandas as pd
from sklearn import tree

In [2]:
fr = open('data/lenses.txt')
lenses = [inst.strip().split('\t') for inst in fr.readlines()]
lenses

[['young', 'myope', 'no', 'reduced', 'no lenses'],
 ['young', 'myope', 'no', 'normal', 'soft'],
 ['young', 'myope', 'yes', 'reduced', 'no lenses'],
 ['young', 'myope', 'yes', 'normal', 'hard'],
 ['young', 'hyper', 'no', 'reduced', 'no lenses'],
 ['young', 'hyper', 'no', 'normal', 'soft'],
 ['young', 'hyper', 'yes', 'reduced', 'no lenses'],
 ['young', 'hyper', 'yes', 'normal', 'hard'],
 ['pre', 'myope', 'no', 'reduced', 'no lenses'],
 ['pre', 'myope', 'no', 'normal', 'soft'],
 ['pre', 'myope', 'yes', 'reduced', 'no lenses'],
 ['pre', 'myope', 'yes', 'normal', 'hard'],
 ['pre', 'hyper', 'no', 'reduced', 'no lenses'],
 ['pre', 'hyper', 'no', 'normal', 'soft'],
 ['pre', 'hyper', 'yes', 'reduced', 'no lenses'],
 ['pre', 'hyper', 'yes', 'normal', 'no lenses'],
 ['presbyopic', 'myope', 'no', 'reduced', 'no lenses'],
 ['presbyopic', 'myope', 'no', 'normal', 'no lenses'],
 ['presbyopic', 'myope', 'yes', 'reduced', 'no lenses'],
 ['presbyopic', 'myope', 'yes', 'normal', 'hard'],
 ['presbyopic', 

In [3]:
with open('data/lenses.txt', 'r') as fr:                                        #加载文件
    lenses = [inst.strip().split('\t') for inst in fr.readlines()]        #处理文件
lenses_target = []                                                        #提取每组数据的类别，保存在列表里
for each in lenses:
    lenses_target.append(each[-1])
lensesLabels = ['age', 'prescript', 'astigmatic', 'tearRate']            #特征标签       
lenses_list = []                                                        #保存lenses数据的临时列表
lenses_dict = {}                                                        #保存lenses数据的字典，用于生成pandas
for each_label in lensesLabels:                                            #提取信息，生成字典
    for each in lenses:
        lenses_list.append(each[lensesLabels.index(each_label)])
    lenses_dict[each_label] = lenses_list
    lenses_list = []

In [4]:
lenses_pd = pd.DataFrame(lenses_dict)                                    #生成pandas.DataFrame
lenses_pd

Unnamed: 0,age,prescript,astigmatic,tearRate
0,young,myope,no,reduced
1,young,myope,no,normal
2,young,myope,yes,reduced
3,young,myope,yes,normal
4,young,hyper,no,reduced
5,young,hyper,no,normal
6,young,hyper,yes,reduced
7,young,hyper,yes,normal
8,pre,myope,no,reduced
9,pre,myope,no,normal


In [5]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()                             #创建LabelEncoder()对象，用于序列化            
for col in lenses_pd.columns:                    #为每一列序列化
    lenses_pd[col] = le.fit_transform(lenses_pd[col])
lenses_pd

Unnamed: 0,age,prescript,astigmatic,tearRate
0,2,1,0,1
1,2,1,0,0
2,2,1,1,1
3,2,1,1,0
4,2,0,0,1
5,2,0,0,0
6,2,0,1,1
7,2,0,1,0
8,0,1,0,1
9,0,1,0,0


In [6]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from six import StringIO
import numpy as np
import pydotplus

clf = tree.DecisionTreeClassifier(max_depth = 4)                        #创建DecisionTreeClassifier()类
clf = clf.fit(lenses_pd.values.tolist(), lenses_target)                    #使用数据，构建决策树
dot_data = StringIO()
tree.export_graphviz(clf, out_file = dot_data,                            #绘制决策树
                    feature_names = lenses_pd.keys(),
                    class_names = clf.classes_,
                    filled=True, rounded=True,
                    special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
graph.write_pdf("tree.pdf")                                                #保存绘制好的决策树，以PDF的形式存储。

True

In [7]:
clf.predict([[1,1,1,0]]) #预测

array(['hard'], dtype='<U9')

In [9]:
clf.score(lenses_pd.values.tolist(),lenses_target)

0.9583333333333334