In [1]:
import pandas as pd
import numpy as np
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier #集成学习

In [6]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes.data"
names = ['preg','plas','pres','skin','test','mass','pedi','age','class']
dataframe = pd.read_csv('./diabetes.csv', header=0, names=names)
dataframe.head()

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [8]:
array = dataframe.values
array

array([[  6.   , 148.   ,  72.   , ...,   0.627,  50.   ,   1.   ],
       [  1.   ,  85.   ,  66.   , ...,   0.351,  31.   ,   0.   ],
       [  8.   , 183.   ,  64.   , ...,   0.672,  32.   ,   1.   ],
       ...,
       [  5.   , 121.   ,  72.   , ...,   0.245,  30.   ,   0.   ],
       [  1.   , 126.   ,  60.   , ...,   0.349,  47.   ,   1.   ],
       [  1.   ,  93.   ,  70.   , ...,   0.315,  23.   ,   0.   ]])

In [9]:
X = array[:,0:8] #前8列，所有行
X.shape

(768, 8)

In [10]:
Y = array[:,-1] #最后一列
Y.shape

(768,)

In [11]:
kfold = model_selection.KFold(n_splits=10, random_state=7) #模型选择器
type(kfold) #模型选择器，随机打乱，用于筛选判断算法的强度

sklearn.model_selection._split.KFold

In [12]:
estimators = [] #列表
model1 = LogisticRegression()
model2 = SVC()
model3 = DecisionTreeClassifier()
estimators.append(("LogisticRegression",model1))
estimators.append(("SVC",model2))
estimators.append(("DecisionTreeClassifier",model3))
ensemble = VotingClassifier(estimators) #算法集成工具
results = model_selection.cross_val_score(ensemble,X,Y,cv=kfold)
print(results, results.mean())

[0.62337662 0.79220779 0.72727273 0.64935065 0.74025974 0.71428571
 0.83116883 0.81818182 0.69736842 0.71052632] 0.7303998632946002


In [13]:
results.max()

0.8311688311688312

In [14]:
ensemble.estimators

[('LogisticRegression',
  LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
            intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
            penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
            verbose=0, warm_start=False)),
 ('SVC', SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)),
 ('DecisionTreeClassifier',
  DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, presort=False, random_state=None,
              splitter='best'))]

In [15]:
ensemble.fit(X,Y) #训练
ensemble.predict(X) #预测

array([1., 0., 1., 0., 1., 0., 1., 0., 1., 1., 0., 1., 0., 1., 1., 1., 1.,
       1., 0., 1., 0., 0., 1., 1., 1., 1., 1., 0., 0., 0., 0., 1., 0., 0.,
       0., 0., 0., 1., 1., 1., 0., 0., 0., 1., 0., 1., 0., 0., 1., 0., 0.,
       0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1., 0.,
       0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1.,
       0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0.,
       0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 1., 1., 1., 0., 0.,
       0., 1., 0., 0., 0., 1., 1., 0., 0., 1., 1., 1., 1., 1., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       0., 1., 1., 0., 0., 0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0.,
       1., 1., 0., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 1.,
       1., 1., 1., 0., 0., 1., 1., 0., 1., 0., 1., 1., 1., 0., 0., 0., 0.,
       0., 0., 1., 1., 0., 1., 0., 0., 0., 1., 1., 1., 1., 0., 1., 1., 1.,
       1., 0., 0., 0., 0.

In [16]:
ensemble.get_params()

{'DecisionTreeClassifier': DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
             max_features=None, max_leaf_nodes=None,
             min_impurity_decrease=0.0, min_impurity_split=None,
             min_samples_leaf=1, min_samples_split=2,
             min_weight_fraction_leaf=0.0, presort=False, random_state=None,
             splitter='best'),
 'DecisionTreeClassifier__class_weight': None,
 'DecisionTreeClassifier__criterion': 'gini',
 'DecisionTreeClassifier__max_depth': None,
 'DecisionTreeClassifier__max_features': None,
 'DecisionTreeClassifier__max_leaf_nodes': None,
 'DecisionTreeClassifier__min_impurity_decrease': 0.0,
 'DecisionTreeClassifier__min_impurity_split': None,
 'DecisionTreeClassifier__min_samples_leaf': 1,
 'DecisionTreeClassifier__min_samples_split': 2,
 'DecisionTreeClassifier__min_weight_fraction_leaf': 0.0,
 'DecisionTreeClassifier__presort': False,
 'DecisionTreeClassifier__random_state': None,
 'DecisionTreeClassifier__split

In [17]:
ensemble.score(X,Y)

1.0