In [1]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline

Using TensorFlow backend.


In [2]:
seed=7
np.random.seed(seed)

In [4]:
# 读取csv文件，
# 该文件没有表头
dataframe=pd.read_csv("./input/iris.csv", header=None)
# 读取具体数据
dataset=dataframe.values
# 取前面四列为特征
# 并转化为float
X=dataset[:, 0:4].astype(float)
# 取最后一列为分类标签
Y=dataset[:,4]

In [5]:
# 标签是字符串
# 需要将标签转化为数字
# 在神经网络中
# 最佳实践是：把类标签对应的数字，转化为 binary matrix
# 例如：1,2,3
# 转化为
# 1,0,0
# 0,1,0
# 0,0,1
encoder=LabelEncoder()
encoder.fit(Y)
encoded_Y=encoder.transform(Y)
dummy_Y=np_utils.to_categorical(encoded_Y)

In [6]:
help(np_utils.to_categorical)

Help on function to_categorical in module keras.utils.np_utils:

to_categorical(y, num_classes=None)
    Converts a class vector (integers) to binary class matrix.
    
    E.g. for use with categorical_crossentropy.
    
    # Arguments
        y: class vector to be converted into a matrix
            (integers from 0 to num_classes).
        num_classes: total number of classes.
    
    # Returns
        A binary matrix representation of the input.



In [7]:
def baseline_model():
    model=Sequential()
    model.add(Dense(8, input_dim=4, activation='relu'))
    model.add(Dense(3, activation='softmax'))
    
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [8]:
estimator=KerasClassifier(build_fn=baseline_model, epochs=200, batch_size=5, verbose=0)

In [9]:
kfold=KFold(n_splits=10, shuffle=True, random_state=seed)

In [10]:
results=cross_val_score(estimator, X, dummy_Y, cv=kfold)
print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Baseline: 97.33% (4.42%)
