## Chapter7.7 ニューラルネットワークの実装

In [1]:
# パッケージの読み込み
import tensorflow as tf
import numpy as np
import pandas as pd

pandas DataFrameに読み込みます
<font color="red">**PROJECTID**を書き換えて実行してください</font>

In [2]:
# BigQueryクエリ結果をDataFrameに読み込む
query = 'SELECT * FROM testdataset.wdbc ORDER BY index'
dataset = pd.read_gbq(project_id='PROJECTID', query=query)

# データの先頭５行を表示
dataset.head()

Requesting query... ok.
Query running...
Query done.
Cache hit.

Retrieving results...
Got 569 rows.

Total time taken 1.36 s.
Finished at 2017-09-26 22:58:08.


Unnamed: 0,index,diagnostic,mean_radius,mean_texture,mean_perimeter,mean_area,mean_smoothness,mean_compactness,mean_concavity,mean_concave_points,...,worst_radius,worst_texture,worst_perimeter,worst_area,worst_smoothness,worst_compactness,worst_concavity,worst_concave_points,worst_symmetry,worst_fractal_dimension
0,8670,M,15.46,19.48,101.7,748.9,0.1092,0.1223,0.1466,0.08087,...,19.26,26.0,124.9,1156.0,0.1546,0.2394,0.3791,0.1514,0.2837,0.08019
1,8913,B,12.89,13.12,81.89,515.9,0.06955,0.03729,0.0226,0.01171,...,13.62,15.54,87.4,577.0,0.09616,0.1147,0.1186,0.05366,0.2309,0.06915
2,8915,B,14.96,19.1,97.03,687.3,0.08992,0.09823,0.0594,0.04819,...,16.25,26.19,109.1,809.8,0.1313,0.303,0.1804,0.1489,0.2962,0.08472
3,9047,B,12.94,16.17,83.18,507.6,0.09879,0.08836,0.03296,0.0239,...,13.86,23.02,89.69,580.9,0.1172,0.1958,0.181,0.08388,0.3297,0.07834
4,85715,M,13.17,18.66,85.98,534.6,0.1158,0.1231,0.1226,0.0734,...,15.67,27.95,102.8,759.4,0.1786,0.4166,0.5006,0.2088,0.39,0.1179


データの整理

In [3]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# 'M'を0, 'B'を1に変換
dataset['diagnostic'] = dataset['diagnostic'].apply(
    lambda x: 0 if x == 'M' else 1)

# 'index'カラムを削除
dataset.drop('index', axis=1, inplace=True)

# DataFrameからarrayに変換
X_dataset = dataset.drop('diagnostic', axis=1).as_matrix()
y_dataset = dataset.diagnostic.as_matrix()

# 学習用とテスト用にデータセットを分ける
X_train, X_test, y_train, y_test = train_test_split(
    X_dataset, y_dataset, test_size=0.2, random_state=42)

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

TensorFlowでニューラルネットワークの実装

In [4]:
NUM_FEATURES = 30
NUM_UNITS_H1 = 4
NUM_UNITS_H2 = 4
NUM_CLASSES = 2

tf.reset_default_graph()

with tf.Graph().as_default():
    # 入力層
    X = tf.placeholder(tf.float32, shape=[None, NUM_FEATURES], name="X")
    y = tf.placeholder(tf.float32, shape=[None, ], name="y")

    # 隠れ層1
    w1 = tf.Variable(tf.truncated_normal(
        [NUM_FEATURES, NUM_UNITS_H1], stddev=0.1), name='w1')
    b1 = tf.Variable(tf.zeros([NUM_UNITS_H1]), name='b1')
    h1 = tf.nn.relu(tf.matmul(X, w1) + b1)

    # 隠れ層2
    w2 = tf.Variable(tf.truncated_normal(
        [NUM_UNITS_H1, NUM_UNITS_H2], stddev=0.1), name='w2')
    b2 = tf.Variable(tf.zeros([NUM_UNITS_H2]), name='b2')
    h2 = tf.nn.relu(tf.matmul(h1, w2) + b2)

    # 出力層
    w3 = tf.Variable(tf.truncated_normal(
        [NUM_UNITS_H2, NUM_CLASSES], stddev=0.1), name='w3')
    b3 = tf.Variable(tf.zeros([NUM_CLASSES]), name='b3')
    logits = tf.matmul(h2, w3) + b3

    # 損失
    onehot_labels = tf.one_hot(indices=tf.cast(y, tf.int32), depth=NUM_CLASSES)
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
        labels=onehot_labels, logits=logits, name='xentropy')
    loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')

    # 損失を最小化
    train_op = tf.train.AdamOptimizer(0.01).minimize(loss)

    # テスト用の正解率演算オペレーション
    correct_prediction = tf.equal(
        tf.argmax(logits, 1), tf.argmax(onehot_labels, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for step in range(1000):
            _, loss_value = sess.run([train_op, loss],
                                     feed_dict={X: X_train, y: y_train})
            if step % 100 == 0:
                print('Step: %d, Loss: %f' % (step, loss_value))

        _a = sess.run(accuracy, feed_dict={X: X_test, y: y_test})
        print('Accuracy: %f' % _a)

Step: 0, Loss: 0.692913
Step: 100, Loss: 0.063897
Step: 200, Loss: 0.040292
Step: 300, Loss: 0.031331
Step: 400, Loss: 0.025768
Step: 500, Loss: 0.021752
Step: 600, Loss: 0.017658
Step: 700, Loss: 0.014913
Step: 800, Loss: 0.011849
Step: 900, Loss: 0.009076
Accuracy: 0.956140


上記をtf.layersで書き換えた場合

In [5]:
NUM_FEATURES = 30
NUM_UNITS_H1 = 4
NUM_UNITS_H2 = 4
NUM_CLASSES = 2

with tf.Graph().as_default():
    # 入力層
    X = tf.placeholder(tf.float32, shape=[None, NUM_FEATURES], name="X")
    y = tf.placeholder(tf.float32, shape=[None, ], name="y")

    # 隠れ層
    hidden1 = tf.layers.dense(
        inputs=X, units=NUM_UNITS_H1, activation=tf.nn.relu, name='hidden1')
    hidden2 = tf.layers.dense(
        inputs=hidden1, units=NUM_UNITS_H2, activation=tf.nn.relu, name='hidden2')

    # 出力層
    logits = tf.layers.dense(inputs=hidden2, units=NUM_CLASSES, name='output')
    
    # 損失
    onehot_labels = tf.one_hot(indices=tf.cast(y, tf.int32), depth=NUM_CLASSES)
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
        labels=onehot_labels, logits=logits, name='xentropy')
    loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')

    # 損失を最小化
    train_op = tf.train.AdamOptimizer(0.01).minimize(loss)

    # テスト用の正解率演算オペレーション
    correct_prediction = tf.equal(
        tf.argmax(logits, 1), tf.argmax(onehot_labels, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for step in range(1000):
            _, loss_value = sess.run([train_op, loss],
                                     feed_dict={X: X_train, y: y_train})
            if step % 100 == 0:
                print('Step: %d, Loss: %f' % (step, loss_value))

        # テストデータで正解率を求める
        _a = sess.run(accuracy, feed_dict={X: X_test, y: y_test})
        print('Accuracy: %f' % _a)

Step: 0, Loss: 0.693059
Step: 100, Loss: 0.182073
Step: 200, Loss: 0.075311
Step: 300, Loss: 0.051364
Step: 400, Loss: 0.039461
Step: 500, Loss: 0.032178
Step: 600, Loss: 0.026469
Step: 700, Loss: 0.021795
Step: 800, Loss: 0.017760
Step: 900, Loss: 0.014390
Accuracy: 0.964912
