[課題のURL](https://diver.diveintocode.jp/curriculums/1626)

# Sprint ディープラーニングフレームワーク1

## 【問題1】スクラッチを振り返る

ディープラーニングを実装するためにはどのようなものが必要だったかを列挙してください。

・ハイパーパラメータの設定  
・ノード設定  
・forwardの処理  
・backwardの処理  
・バッチ処理  
・推定の処理  

## 【問題2】スクラッチとTensorFlowの対応を考える

以下のサンプルコードを見て、先ほど列挙した「ディープラーニングを実装するために必要なもの」が  
TensorFlowではどう実装されているかを確認してください。  
  
・計算グラフの定義  
・ノード設定  
・ネットワーク構造の読み込み  
・目的関数の設定  
・最適化手法の設定  
・推定結果の設定  
・学習  

In [1]:
# X_train.shape, y_train.shape, X_val.shape, y_val.shape, X_test.shape, y_test.shape
# ((64, 4), (64, 1), (16, 4), (16, 1), (20, 4), (20, 1))

In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as ft

In [14]:
# データセットの読み込み
dataset_path ="Iris.csv"
df = pd.read_csv(dataset_path)
# データフレームから条件抽出
df = df[(df["Species"] == "Iris-versicolor")|(df["Species"] == "Iris-virginica")]
y = df["Species"]
X = df.loc[:, ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"]]
y = np.array(y)
X = np.array(X)
# ラベルを数値に変換
y[y=='Iris-versicolor'] = 0
y[y=='Iris-virginica'] = 1
y = y.astype(np.int)[:, np.newaxis]
# trainとtestに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# さらにtrainとvalに分割
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)
class GetMiniBatch:
    """
    ミニバッチを取得するイテレータ

    Parameters
    ----------
    X : 次の形のndarray, shape (n_samples, n_features)
      訓練データ
    y : 次の形のndarray, shape (n_samples, 1)
      正解値
    batch_size : int
      バッチサイズ
    seed : int
      NumPyの乱数のシード
    """
    def __init__(self, X, y, batch_size = 10, seed=0):
        self.batch_size = batch_size
        np.random.seed(seed)
        shuffle_index = np.random.permutation(np.arange(X.shape[0]))
        self.X = X[shuffle_index]
        self.y = y[shuffle_index]
        self._stop = np.ceil(X.shape[0]/self.batch_size).astype(np.int)
    def __len__(self):
        return self._stop
    def __getitem__(self,item):
        p0 = item*self.batch_size
        p1 = item*self.batch_size + self.batch_size
        return self.X[p0:p1], self.y[p0:p1]        
    def __iter__(self):
        self._counter = 0
        return self
    def __next__(self):
        if self._counter >= self._stop:
            raise StopIteration()
        p0 = self._counter*self.batch_size
        p1 = self._counter*self.batch_size + self.batch_size
        self._counter += 1
        return self.X[p0:p1], self.y[p0:p1]
# ハイパーパラメータの設定
learning_rate = 0.01
batch_size = 10
num_epochs = 10
n_hidden1 = 50
n_hidden2 = 100
n_input = X_train.shape[1]
n_samples = X_train.shape[0]
n_classes = 1
# 計算グラフに渡す引数の形を決める
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])
# trainのミニバッチイテレータ
get_mini_batch_train = GetMiniBatch(X_train, y_train, batch_size=batch_size)
def example_net(x):
    """
    単純な3層ニューラルネットワーク
    """
    # 重みとバイアスの宣言
    weights = {
        'w1': tf.Variable(tf.random_normal([n_input, n_hidden1])),
        'w2': tf.Variable(tf.random_normal([n_hidden1, n_hidden2])),
        'w3': tf.Variable(tf.random_normal([n_hidden2, n_classes]))
    }
    biases = {
        'b1': tf.Variable(tf.random_normal([n_hidden1])),
        'b2': tf.Variable(tf.random_normal([n_hidden2])),
        'b3': tf.Variable(tf.random_normal([n_classes]))
    }
    layer_1 = tf.add(tf.matmul(x, weights['w1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    layer_2 = tf.add(tf.matmul(layer_1, weights['w2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)
    layer_output = tf.matmul(layer_2, weights['w3']) + biases['b3'] # tf.addと+は等価である
    return layer_output
# ネットワーク構造の読み込み                               
logits = example_net(X)
# 目的関数
loss_op = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=Y, logits=logits))
# 最適化手法
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
# 推定結果
correct_pred = tf.equal(tf.sign(Y - 0.5), tf.sign(tf.sigmoid(logits) - 0.5))
# 指標値計算
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# variableの初期化
init = tf.global_variables_initializer()

# 計算グラフの実行
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(num_epochs):
        # エポックごとにループ
        total_batch = np.ceil(X_train.shape[0]/batch_size).astype(np.int)
        total_loss = 0
        total_acc = 0
        for i, (mini_batch_x, mini_batch_y) in enumerate(get_mini_batch_train):
            # ミニバッチごとにループ
            sess.run(train_op, feed_dict={X: mini_batch_x, Y: mini_batch_y})
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: mini_batch_x, Y: mini_batch_y})
            total_loss += loss
            total_acc += acc
        total_loss /= n_samples
        total_acc /= n_samples
        val_loss, val_acc = sess.run([loss_op, accuracy], feed_dict={X: X_val, Y: y_val})
        print("Epoch {}, loss : {:.4f}, val_loss : {:.4f}, acc : {:.3f}, val_acc : {:.3f}".format(epoch, loss, val_loss, acc, val_acc))
    test_acc = sess.run(accuracy, feed_dict={X: X_test, Y: y_test})
    print("test_acc : {:.3f}".format(test_acc))

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 0, loss : 17.3896, val_loss : 33.9632, acc : 0.750, val_acc : 0.375
Epoch 1, loss : 0.1424, val_loss : 1.3803, acc : 1.000, val_acc : 0.812
Epoch 2, loss : 4.0698, val_loss : 7.3069, acc : 0.750, val_acc : 0.625
Epoch 3, loss : 0.7163, val_loss : 2.6266, acc : 0.750, val_acc : 0.688
Epoch 4, loss : 1.2964, val_loss : 5.8085, acc : 0.750, val_acc : 0.688
Epoch 5, loss : 0.0000, val_loss : 0.4036, acc : 1.000, val_acc : 0.938
Epoch 6, loss : 0.0001, val_loss : 3.4165, acc : 1.000, val_acc : 0.812
Epoch 7, loss : 0.0000, val_loss : 0.9855, acc : 1.000, val_acc : 0.875
Epoch 8, loss : 0.0000, val_loss : 2.5873, acc : 1.000, val_acc : 0.875
Epoch 9, loss : 0.0000, val_loss : 1.4164, acc : 1.000, val_acc : 0.875
test_acc : 0.800


## 【問題3】3種類全ての目的変数を使用したIrisのモデルを作成

問題2のサンプルコードを書き換え、これらに対して学習・推定を行うニューラルネットワークを作成してください。

1, Iris（3種類全ての目的変数を使用）  
2, House Prices  

どのデータセットもtrain, val, testの3種類に分けて使用してください。

In [25]:
def encode_3(data):
    t = np.zeros((data.size, 3))
    for i in range(data.size):
        t[i, data[i]] = 1
    return t

In [27]:
# データセットの読み込み
dataset_path ="Iris.csv"
df = pd.read_csv(dataset_path)
# データフレームから条件抽出
#df = df[(df["Species"] == "Iris-versicolor")|(df["Species"] == "Iris-virginica")]
df = df[(df["Species"] == "Iris-versicolor")|(df["Species"] == "Iris-virginica")|(df["Species"] == "Iris-setosa")]
y = df["Species"]
X = df.loc[:, ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"]]
y = np.array(y)
X = np.array(X)
# ラベルを数値に変換
y[y=='Iris-versicolor'] = 0
y[y=='Iris-virginica'] = 1
y[y=='Iris-setosa'] = 2
y = y.astype(np.int)[:, np.newaxis]
y = encode_3(y)
# trainとtestに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# さらにtrainとvalに分割
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

In [28]:
"""
TensorFlowで実装したニューラルネットワークを使いIrisデータセットを3値分類する
"""
# ハイパーパラメータの設定
learning_rate = 0.02
batch_size = 10
num_epochs = 10
n_hidden1 = 5
n_hidden2 = 10
n_input = X_train.shape[1]
n_samples = X_train.shape[0]
n_samples_test = X_test.shape[0]
n_samples_val = X_val.shape[0]
n_classes = 3

# 計算グラフに渡す引数の形を決める
X = tf.placeholder("float", [None, n_input]) # X_train.shape(96, 4)
Y = tf.placeholder("float", [None, n_classes]) # 3

# trainのミニバッチイテレータ
get_mini_batch_train = GetMiniBatch(X_train, y_train, batch_size=batch_size)
def example_net(x):
    """
    単純な3層ニューラルネットワーク
    """
    # 重みとバイアスの宣言
    weights = {
        'w1': tf.Variable(tf.random_normal([n_input, n_hidden1])),
        'w2': tf.Variable(tf.random_normal([n_hidden1, n_hidden2])),
        'w3': tf.Variable(tf.random_normal([n_hidden2, n_classes]))
    }
    biases = {
        'b1': tf.Variable(tf.random_normal([n_hidden1])),
        'b2': tf.Variable(tf.random_normal([n_hidden2])),
        'b3': tf.Variable(tf.random_normal([n_classes]))
    }
    layer_1 = tf.add(tf.matmul(x, weights['w1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    layer_2 = tf.add(tf.matmul(layer_1, weights['w2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)
    #layer_output = tf.matmul(layer_2, weights['w3']), biases['b3'] # tf.addと+は等価である
    layer_output = tf.nn.softmax(tf.matmul(layer_2, weights['w3']) + biases['b3'])
    
    return layer_output
# ネットワーク構造の読み込み                               
logits = example_net(X)

# 目的関数
# loss_op = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=Y, logits=logits))
loss_op = -tf.reduce_sum(Y * tf.log(logits + 1e-7))

# 最適化手法
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

# 推定結果
#correct_pred = tf.equal(tf.sign(Y - 0.5), tf.sign(tf.sigmoid(logits) - 0.5))
correct_pred = tf.equal(tf.argmax(Y, 1), tf.argmax(logits, 1))

# 指標値計算
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# variableの初期化
init = tf.global_variables_initializer()

# 計算グラフの実行
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(num_epochs):
        # エポックごとにループ
        total_batch = np.ceil(X_train.shape[0]/batch_size).astype(np.int)
        total_loss = 0
        total_acc = 0
        for i, (mini_batch_x, mini_batch_y) in enumerate(get_mini_batch_train):
            sess.run(train_op, feed_dict={X: mini_batch_x, Y: mini_batch_y})
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: mini_batch_x, Y: mini_batch_y})
        
        train_loss, train_acc = sess.run([loss_op, accuracy], feed_dict={X: X_train, Y: y_train})
        val_loss, val_acc = sess.run([loss_op, accuracy], feed_dict={X: X_val, Y: y_val})
        print("Epoch {}, loss : {:.4f}, val_loss : {:.4f}, acc : {:.3f}, val_acc : {:.3f}".format(epoch, train_loss/n_samples, val_loss/n_samples_val, train_acc, val_acc))
    test_acc = sess.run(accuracy, feed_dict={X: X_test, Y: y_test})
    print("test_acc : {:.3f}".format(test_acc))


Epoch 0, loss : 5.7263, val_loss : 5.7893, acc : 0.365, val_acc : 0.375
Epoch 1, loss : 0.6192, val_loss : 0.5786, acc : 0.677, val_acc : 0.708
Epoch 2, loss : 1.0641, val_loss : 1.0634, acc : 0.323, val_acc : 0.333
Epoch 3, loss : 0.6006, val_loss : 0.5726, acc : 0.688, val_acc : 0.708
Epoch 4, loss : 0.5914, val_loss : 0.5697, acc : 0.688, val_acc : 0.708
Epoch 5, loss : 0.5193, val_loss : 0.5284, acc : 0.885, val_acc : 0.792
Epoch 6, loss : 0.4024, val_loss : 0.4670, acc : 0.854, val_acc : 0.708
Epoch 7, loss : 0.3503, val_loss : 0.4157, acc : 0.979, val_acc : 0.958
Epoch 8, loss : 0.2850, val_loss : 0.3875, acc : 0.948, val_acc : 0.917
Epoch 9, loss : 0.2472, val_loss : 0.3431, acc : 0.990, val_acc : 0.917
test_acc : 1.000


## 【問題4】House Pricesのモデルを作成

回帰問題のデータセットであるHouse Pricesを使用したモデルを作成してください。

目的変数としてSalePrice、説明変数として、GrLivAreaとYearBuiltを使ってください。  
説明変数はさらに増やしても構いません。  

In [178]:
df = pd.read_csv('train.csv', index_col=0)

In [179]:
# GrLivAreaとYearBuiltを抜き出す。
X = df[["GrLivArea", "YearBuilt"]]
y = df["SalePrice"]

In [180]:
df = pd.concat([X, y], axis=1)
df.head(3)

Unnamed: 0_level_0,GrLivArea,YearBuilt,SalePrice
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1710,2003,208500
2,1262,1976,181500
3,1786,2001,223500


In [183]:
# 対数変換
df["SalePrice"] = np.log(df["SalePrice"])
df["GrLivArea"] = np.log(df["GrLivArea"])
df["YearBuilt"] = np.log(df["YearBuilt"])

# 外れ値の除去
df.drop(df[(df["GrLivArea"]>4500) & (df['SalePrice']<300000)].index, inplace=True)
df.reset_index(drop=True, inplace=True)

X = df[["GrLivArea", "YearBuilt"]]
#X = df[["GrLivArea"]]
y = df["SalePrice"]
# df = pd.concat([X, y], axis=1)
#df.head(2)

X = np.array(X)
y = np.array(y)

In [184]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

X_train.shape, y_train.shape, X_test.shape, y_test.shape, X_val.shape, y_val.shape

((934, 2), (934,), (292, 2), (292,), (234, 2), (234,))

In [187]:
y_train = y_train.reshape(934,1)
y_test = y_test.reshape(292,1)
y_val = y_val.reshape(234,1)


In [188]:
# TensorFlow
# [X] 
# ハイパーパラメータの設定
lr = 0.001
batch_size = 200
num_epochs = 10
n_hidden1 = 50
n_hidden2 = 10 # 利用しない
n_input = X_train.shape[1]
n_samples = X_train.shape[0]
n_classes = 1

X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])

get_mini_batch_train = GetMiniBatch(X_train, y_train, batch_size=batch_size)

def example_net(x):
    """
    単純な2層ニューラルネットワーク
    """
    # 重みとバイアスの宣言
    w1 = tf.Variable(tf.truncated_normal([n_input, n_hidden1], stddev=0.1))
    b1 = tf.Variable(tf.constant(1.0, shape=[n_hidden1]))
    
    w2 = tf.Variable(tf.truncated_normal([n_hidden1, n_classes], stddev=0.1))
    b2 = tf.Variable(tf.constant(1.0, shape=[n_classes]))
    
    layer_1 = tf.add(tf.matmul(x, w1), b1)
    layer_1 = tf.nn.relu(layer_1)
    layer_output = tf.add(tf.matmul(layer_1, w2), b2)
    layer_output = tf.nn.relu(layer_output)
    
    return layer_output

# 線形回帰
# y_model = tf.add(tf.matmul(X, W), b)
y_model = example_net(X)

#cost = tf.reduce_mean(tf.square(Y - y_model))
cost = tf.losses.mean_squared_error(labels = y_model, predictions = Y)

train_op = tf.train.GradientDescentOptimizer(lr).minimize(cost)

with tf.Session() as sess:
    # you need to initialize variables (in this case just variable W)
    tf.global_variables_initializer().run()

    for epoch in range(num_epochs):
        # エポックごとにループ
        total_batch = np.ceil(X_train.shape[0]/batch_size).astype(np.int)
        total_loss = 0
        for i, (mini_batch_x, mini_batch_y) in enumerate(get_mini_batch_train):
            sess.run(train_op, feed_dict={X:mini_batch_x, Y:mini_batch_y})

            loss = sess.run(cost, feed_dict={X:mini_batch_x, Y:mini_batch_y})
            total_loss += loss
        total_loss /= n_samples
        loss_val = sess.run(cost, feed_dict={X:X_val, Y:y_val})
        print("epoch:", epoch, "loss",loss, "loss_val",loss_val)
                
    mse_test = sess.run(cost, feed_dict={X:X_test, Y:y_test})
    print("平均二乗誤差（標準偏差、ばらつき、MSE）", mse_test)

epoch: 0 loss 1.255467 loss_val 1.2605872
epoch: 1 loss 0.37204647 loss_val 0.37486854
epoch: 2 loss 0.10994452 loss_val 0.11146738
epoch: 3 loss 0.03274781 loss_val 0.0335496
epoch: 4 loss 0.0102099925 loss_val 0.01061587
epoch: 5 loss 0.00370763 loss_val 0.0038974285
epoch: 6 loss 0.0018681029 loss_val 0.0019403688
epoch: 7 loss 0.0013668236 loss_val 0.0013752786
epoch: 8 loss 0.0012408034 loss_val 0.0012146477
epoch: 9 loss 0.0012152763 loss_val 0.0011703578
平均二乗誤差（標準偏差、ばらつき、MSE） 0.0011273986


In [137]:
final_w, final_b

(0.0, array([0.], dtype=float32))

## 【問題5】MNISTのモデルを作成

ニューラルネットワークのスクラッチで使用したMNISTを分類するモデルを作成してください。  

3クラス以上の分類という点ではひとつ前のIrisと同様です。入力が画像であるという点で異なります。  

スクラッチで実装したモデルの再現を目指してください。  

In [26]:
from keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()

# print(X_train.shape) # (60000, 28, 28)
# print(X_test.shape) # (10000, 28, 28)

X_train = X_train.astype(np.float)
X_test = X_test.astype(np.float)
X_train /= 255
X_test /= 255
# print(X_train.max()) # 1.0
# print(X_train.min()) # 0.0

print(X_train.shape)
print(X_test.shape)

(60000, 28, 28)
(10000, 28, 28)


In [27]:
#X_train = X_train.reshape(-1, 784)
#X_test = X_test.reshape(-1, 784)

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2)
print(X_train.shape) # (48000, 784)
print(X_val.shape) # (12000, 784)


(48000, 28, 28)
(12000, 28, 28)


In [28]:
"""
X_train = X_train.reshape(48000, 28, 28, 1)
y_train = y_train.reshape(48000, 1)
X_test = X_test.reshape(10000, 28, 28, 1)
y_test = y_test.reshape(10000, 1)
X_val = X_val.reshape(12000, 28, 28, 1)
y_val = y_val.reshape(12000, 1)
"""
y_train = y_train.reshape(48000, 1)
y_test = y_test.reshape(10000, 1)
y_val = y_val.reshape(12000, 1)

X_train.shape, y_train.shape, X_test.shape, y_test.shape, X_val.shape, y_val.shape

((48000, 28, 28),
 (48000, 1),
 (10000, 28, 28),
 (10000, 1),
 (12000, 28, 28),
 (12000, 1))

In [29]:
def _encode_10(data):
        t = np.zeros((data.size, 10))
        for i in range(data.size):
            t[i, data[i]] = 1
        return t

In [35]:
"""
TensorFlowで実装したニューラルネットワークを使いMINSTを分類する
"""
# ハイパーパラメータの設定
learning_rate = 0.001
batch_size = 20
num_epochs = 10
n_hidden1 = 40
n_hidden2 = 20
n_input = X_train.shape[1]
n_samples = X_train.shape[0]
n_samples_test = X_test.shape[0]
n_samples_val = X_val.shape[0]
n_classes = 10
# 計算グラフに渡す引数の形を決める
tf.reset_default_graph()

X = tf.placeholder("float", [None, 28, 28, 1])
Y = tf.placeholder("float", [None, n_classes])

#プレースホルダー作成
#X = tf.placeholder(tf.float32, shape=[None, 784])
#Y = tf.placeholder(tf.float32, shape=[None, 10])

# trainのミニバッチイテレータ
get_mini_batch_train = GetMiniBatch(X_train, y_train, batch_size=batch_size)

def example_net(x):
    """
    単純な3層ニューラルネットワーク
    """
    # ノード設定
    # [Conv - ReLU - Pool]:4320 - 400:[afine - ReLU]:100 - 100:[afine - ReLU]:10
    # 畳み込み層1
    filter_num = 30
    filter_size = 5
    filter_pad = 0
    filter_stride = 1
    hidden_size = 100
    input_size = 28
    output_size = 10
    max_pool_size1 = 2
    
    conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1
    pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2))
    
    w1 = tf.Variable(tf.truncated_normal([filter_size, filter_size, 1, filter_num], stddev=0.1), dtype=tf.float32)
    b1 = tf.Variable(tf.constant(0.1, shape=[filter_num]), dtype=tf.float32)
    layer_1 = tf.nn.conv2d(x, w1, strides=[1, filter_stride, filter_stride, 1], padding="SAME", data_format='NHWC')
    layer_1 = tf.nn.relu(layer_1 + b1)
    layer_1 = tf.nn.max_pool(layer_1, ksize=[1, max_pool_size1, max_pool_size1, 1], strides=[1, max_pool_size1, max_pool_size1, 1], padding="SAME")    
    
    layer_1 = tf.reshape(layer_1, (-1, layer_1.shape[1]*layer_1.shape[2]*layer_1.shape[3]))
    
    # 784:[afine - ReLU]:40
    #w1 = tf.Variable(tf.random_normal([784, n_hidden1]))
    #b1 = tf.Variable(tf.random_normal([n_hidden1]))
    #layer_1 = tf.add(tf.matmul(x, w1), b1)
    #layer_1 = tf.nn.relu(layer_1)
    
    # 40:[afine - ReLU]:20
    w2 = tf.Variable(tf.random_normal([5880, n_hidden2]))
    b2 = tf.Variable(tf.random_normal([n_hidden2]))
    layer_2 = tf.add(tf.matmul(layer_1, w2), b2)
    layer_2 = tf.nn.relu(layer_2)
    
    # 20:[afine - ReLU]:10
    w3 = tf.Variable(tf.random_normal([n_hidden2, n_classes]))
    b3 = tf.Variable(tf.random_normal([n_classes]))
    layer_output = tf.nn.softmax(tf.matmul(layer_2, w3) + b3)
    
    return layer_output

# ネットワーク構造の読み込み
logits = example_net(X)

# 目的関数
loss_op = -tf.reduce_sum(Y * tf.log(logits + 1e-7))

# 最適化手法
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

# 推定結果
correct_pred = tf.equal(tf.argmax(Y, 1), tf.argmax(logits, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# variableの初期化
init = tf.global_variables_initializer()

count = 0

# 計算グラフの実行
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(num_epochs):
        # エポックごとにループ
        total_batch = np.ceil(X_train.shape[0]/batch_size).astype(np.int)
        total_loss = 0
        total_acc = 0
        for i, (mini_batch_x, mini_batch_y) in enumerate(get_mini_batch_train):
            # IFを合わせるため、chを追加
            mini_batch_x = mini_batch_x.reshape(mini_batch_x.shape[0], mini_batch_x.shape[1], mini_batch_x.shape[2], 1)
            mini_batch_y = _encode_10(mini_batch_y)
            
            sess.run(train_op, feed_dict={X: mini_batch_x, Y: mini_batch_y})
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: mini_batch_x, Y: mini_batch_y})
            
            # count = count + 1
            # if count % 100 == 0:
                # print(loss, acc)
            
            #total_loss += loss
            #total_acc += acc
        #total_loss /= n_samples
        #total_acc /= n_samples
        # 1エポックごとに結果を表示
        X_train_4d = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
        y_train_one_hot = _encode_10(y_train)
        train_loss, train_acc = sess.run([loss_op, accuracy], feed_dict={X: X_train_4d, Y: y_train_one_hot})
        
        X_val_4d = X_val.reshape(X_val.shape[0], X_val.shape[1], X_val.shape[2], 1)
        y_val_one_hot = _encode_10(y_val)
        val_loss, val_acc = sess.run([loss_op, accuracy], feed_dict={X: X_val_4d, Y: y_val_one_hot})
        print("Epoch {}, loss : {:.4f}, val_loss : {:.4f}, acc : {:.3f}, val_acc : {:.3f}".format(epoch, train_loss/n_samples, val_loss/n_samples_val, train_acc, val_acc))
        #print("Epoch {}, loss : {:.4f}, acc : {:.3f}".format(epoch, loss, acc))
        
    y_test_one_hot = _encode_10(y_test)
    X_test_4d = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)
    test_loss, test_acc = sess.run([loss_op, accuracy], feed_dict={X: X_test_4d, Y: y_test_one_hot})
    
    print("test_loss : {:.3f}".format(test_loss/n_samples_test))
    print("test_acc : {:.3f}".format(test_acc))

Epoch 0, loss : 0.8479, val_loss : 0.8376, acc : 0.756, val_acc : 0.759
Epoch 1, loss : 0.3952, val_loss : 0.4028, acc : 0.897, val_acc : 0.894
Epoch 2, loss : 0.2644, val_loss : 0.2777, acc : 0.928, val_acc : 0.924
Epoch 3, loss : 0.2042, val_loss : 0.2197, acc : 0.943, val_acc : 0.938
Epoch 4, loss : 0.1625, val_loss : 0.1821, acc : 0.955, val_acc : 0.950
Epoch 5, loss : 0.1353, val_loss : 0.1569, acc : 0.961, val_acc : 0.956
Epoch 6, loss : 0.1217, val_loss : 0.1437, acc : 0.965, val_acc : 0.960
Epoch 7, loss : 0.1058, val_loss : 0.1307, acc : 0.970, val_acc : 0.965
Epoch 8, loss : 0.0944, val_loss : 0.1223, acc : 0.973, val_acc : 0.966
Epoch 9, loss : 0.0900, val_loss : 0.1269, acc : 0.974, val_acc : 0.967
test_loss : 0.118
test_acc : 0.968
