# Sprint14 Keras

## 【問題1】公式チュートリアルモデルを分担して実行
---
TensorFLowの公式チュートリアルモデルを分担して実行してください。

以下の中から1人ひとつ選び実行し、その結果を簡単に発表してください。

### 畳み込みニューラルネットワーク

In [63]:
# データの準備
import tensorflow as tf

from tensorflow.keras import datasets, layers, models

(train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()

train_images = train_images.reshape((60000, 28, 28, 1))
test_images = test_images.reshape((10000, 28, 28, 1))

# ピクセルの値を 0~1 の間に正規化
train_images, test_images = train_images / 255.0, test_images / 255.0



In [64]:
# CNNモデルの構築 
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))



In [65]:
# モデルのアーキテクチャを表示
model.summary()


Model: "sequential_41"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_18 (Conv2D)           (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_19 (Conv2D)           (None, 11, 11, 64)        18496     
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 5, 5, 64)          0         
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 3, 3, 64)          36928     
_________________________________________________________________
flatten_4 (Flatten)          (None, 576)               0         
_________________________________________________________________
dense_105 (Dense)            (None, 64)              

In [66]:
# モデルをコンパイル
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


In [67]:
#　学習
model.fit(train_images, train_labels, epochs=5)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x1519d1efe48>

In [68]:
# モデルの評価
test_loss, test_acc = model.evaluate(test_images,  test_labels, verbose=2)

print(test_acc)

313/313 - 1s - loss: 0.0475 - accuracy: 0.9851
0.9850999712944031


### 上記のCNNモデルを実行してみた結果、99%とかなり高い精度結果を得られた。

## 【問題3】Iris（2値分類）をKerasで学習
---
TensorFlowによるIrisデータセットに対する2値分類をKerasに書き換えてください。

In [69]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf

# データセットの読み込み
dataset_path ="../Iris.csv"
df = pd.read_csv(dataset_path)
# データフレームから条件抽出
df = df[(df["Species"] == "Iris-versicolor")|(df["Species"] == "Iris-virginica")]
y = df["Species"]
X = df.loc[:, ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"]]
y = np.array(y)
X = np.array(X).astype(np.float32)
# ラベルを数値に変換
y[y=='Iris-versicolor'] = 0
y[y=='Iris-virginica'] = 1
y = y.astype(np.float32)[:, np.newaxis]
# trainとtestに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# さらにtrainとvalに分割
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

# 標準化
scaler = StandardScaler()
scaler.fit(X_train) #trainデータのみFitを実行する
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
X_val = scaler.transform(X_val)

    
# ハイパーパラメータの設定
learning_rate = 0.01
batch_size = 32
num_epochs = 20
n_hidden1 = 50
n_hidden2 = 100
n_input = X_train.shape[1]    #特徴量
n_samples = X_train.shape[0]
n_classes = 1

# trainのミニバッチイテレータ
get_mini_batch_train = GetMiniBatch(X_train, y_train, batch_size=batch_size)

# モデルの構築
model = tf.keras.Sequential([
            tf.keras.layers.Input(n_input),
            tf.keras.layers.Dense(n_hidden1, activation='relu'),
            tf.keras.layers.Dense(n_hidden2, activation='relu'),
            tf.keras.layers.Dense(n_classes, activation='sigmoid')
        ])


# モデルのコンパイル
model.compile(
            loss='binary_crossentropy',
            optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
            metrics=['accuracy']
)

# モデルのアーキテクチャを表示
model.summary()

# モデルの学習
model.fit(
        X_train, y_train, 
        batch_size=batch_size, 
        epochs=num_epochs, 
        validation_data=(X_val, y_val), 
        verbose=1
)


score = model.evaluate(X_train, y_train, verbose=0)
print('Train loss:', score[0])
print('Train accuracy:', score[1])

y_pred_proba = model.predict(X_test)[:, 0]
# 確率を0, 1に変換
y_pred = np.where(y_pred_proba >0.5, 1, 0)
print("y_pred_proba", y_pred_proba)
print("y_pred", y_pred)


Model: "sequential_42"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_107 (Dense)            (None, 50)                250       
_________________________________________________________________
dense_108 (Dense)            (None, 100)               5100      
_________________________________________________________________
dense_109 (Dense)            (None, 1)                 101       
Total params: 5,451
Trainable params: 5,451
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Train loss: 0.0015747310826554894
Train accuracy: 1.0
y_pred_proba [7.2439994e-05 1.0000000e+00 8.6196951e-06 1.0000000e+00 9.9999452e-01
 1.0000000e+

## 【問題4】Iris（多値分類）をKerasで学習
---
TensorFlowによるIrisデータセットに対する3値分類をKerasに書き換えてください。

In [70]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.utils import np_utils
import tensorflow as tf
# データセットの読み込み
dataset_path ="../Iris.csv"
df = pd.read_csv(dataset_path)
# データフレームから条件抽出
# df = df[(df["Species"] == "Iris-versicolor")|(df["Species"] == "Iris-virginica")]
y = df["Species"]
X = df.loc[:, ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"]]
y = np.array(y)
X = np.array(X).astype(np.float32)

# ラベルを数値に変換
y[y=='Iris-setosa'] = 0
y[y=='Iris-versicolor'] = 1
y[y=='Iris-virginica'] = 2

# : One-Hot形式に変換
y = np_utils.to_categorical(y)
# print(y)

# trainとtestに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# さらにtrainとvalに分割
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)


# 標準化
scaler = StandardScaler()
scaler.fit(X_train) #trainデータのみFitを実行する
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
X_val = scaler.transform(X_val)

# ハイパーパラメータの設定
learning_rate = 0.01
batch_size = 10
num_epochs = 20
n_hidden1 = 50
n_hidden2 = 100
n_input = X_train.shape[1]
n_samples = X_train.shape[0]
n_classes = 3

# モデルの構築
model = tf.keras.Sequential([
            tf.keras.layers.Input(n_input),
            tf.keras.layers.Dense(n_hidden1, activation='relu'),
            tf.keras.layers.Dense(n_hidden2, activation='relu'),
            tf.keras.layers.Dense(n_classes, activation='softmax')
        ])


# モデルのアーキテクチャを表示
model.summary()

# モデルのコンパイル
model.compile(loss='categorical_crossentropy',
              optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
              metrics=['accuracy'])


# モデルの学習
history = model.fit(X_train, y_train, 
                   batch_size=batch_size, 
                   epochs=num_epochs, 
                   validation_data=(X_val, y_val), 
                   verbose=1)



train_score = model.evaluate(X_train, y_train)
test_score = model.evaluate(X_test, y_test)
print('Train loss:', train_score[0])
print('Train accuracy:', train_score[1])
print('Test loss:', test_score[0])
print('Test accuracy:', test_score[1])

y_pred_proba = model.predict(X_test)
y_pred = np.argmax(y_pred_proba, axis=1)
print("y_pred_proba", y_pred_proba)
print("y_pred", y_pred)


Model: "sequential_43"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_110 (Dense)            (None, 50)                250       
_________________________________________________________________
dense_111 (Dense)            (None, 100)               5100      
_________________________________________________________________
dense_112 (Dense)            (None, 3)                 303       
Total params: 5,653
Trainable params: 5,653
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Train loss: 0.023436004295945168
Train accuracy: 1.0
Test loss: 0.012413198128342628
Test accuracy: 1.0
y_pred_proba [[1.4016538e-07 3.6734207e-05 9.9

## 【問題5】House PricesをKerasで学習
---
TensorFlowによるHouse Pricesデータセットに対する回帰をKerasに書き換えてください。

In [71]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf

df = pd.read_csv("../train.csv")
X = df[['GrLivArea', 'YearBuilt']]
X = np.array(X).astype(np.float32)
y = df['SalePrice']
y = np.array(y).reshape(-1, 1)
y = np.log(y)

# trainとtestに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# さらにtrainとvalに分割
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

# ハイパーパラメータの設定
learning_rate = 0.001
batch_size = 10
num_epochs = 30
n_hidden1 = 50
n_hidden2 = 100
n_input = X_train.shape[1]
n_samples = X_train.shape[0]
n_classes = 3

# モデルの構築
model = tf.keras.Sequential([
            tf.keras.layers.Input(n_input),
            tf.keras.layers.Dense(n_hidden1, activation='relu'),
            tf.keras.layers.Dense(n_hidden2, activation='relu'),
            tf.keras.layers.Dense(n_classes, activation='linear')
        ])


# モデルのアーキテクチャを表示
model.summary()

# モデルのコンパイル
model.compile(loss='mean_squared_error',
              optimizer=tf.keras.optimizers.Adagrad(learning_rate=learning_rate),
              metrics=['mse'])


# モデルの学習
history = model.fit(X_train, y_train, 
                   batch_size=batch_size, 
                   epochs=num_epochs, 
                   validation_data=(X_val, y_val), 
                   verbose=1)



train_score = model.evaluate(X_train, y_train)
test_score = model.evaluate(X_test, y_test)
print('Train loss:', train_score[0])
print('Train accuracy:', train_score[1])
print('Test loss:', test_score[0])
print('Test accuracy:', test_score[1])

y_pred_proba = model.predict(X_test)
print("y_pred_proba", y_pred_proba[:10])


Model: "sequential_44"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_113 (Dense)            (None, 50)                150       
_________________________________________________________________
dense_114 (Dense)            (None, 100)               5100      
_________________________________________________________________
dense_115 (Dense)            (None, 3)                 303       
Total params: 5,553
Trainable params: 5,553
Non-trainable params: 0
_________________________________________________________________
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Train loss: 1.0755237340927124

## 【問題6】MNISTをKerasで学習
---
TensorFlowによるMNISTデータセットによる画像の多値分類をKerasに書き換えてください。

In [72]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(-1, 784)
X_test = X_test.reshape(-1, 784)

X_train = X_train.astype(np.float)
X_test = X_test.astype(np.float)
X_train /= 255
X_test /= 255

# One-hotエンコーダー
enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
y_train_one_hot = enc.fit_transform(y_train[:, np.newaxis])
y_test_one_hot = enc.transform(y_test[:, np.newaxis])

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train_one_hot, test_size=0.2)

# ハイパーパラメータの設定
learning_rate = 0.01
batch_size = 20
num_epochs = 10
n_hidden1 = 50
n_hidden2 = 100
n_input = X_train.shape[1]
n_samples = X_train.shape[0]
n_classes = 10


# モデルの構築
model = tf.keras.Sequential([
            tf.keras.layers.Input(n_input),
            tf.keras.layers.Dense(n_hidden1, activation='relu'),
            tf.keras.layers.Dense(n_hidden2, activation='relu'),
            tf.keras.layers.Dense(n_classes, activation='softmax')
        ])

# モデルのアーキテクチャを表示
model.summary()

# モデルのコンパイル
model.compile(loss='categorical_crossentropy',
              optimizer=tf.keras.optimizers.Adagrad(learning_rate=learning_rate),
              metrics=['accuracy'])


# モデルの学習
history = model.fit(X_train, y_train, 
                   batch_size=batch_size, 
                   epochs=num_epochs, 
                   validation_data=(X_val, y_val), 
                   verbose=1)



train_score = model.evaluate(X_train, y_train)
test_score = model.evaluate(X_test, y_test_one_hot)
print('Train loss:', train_score[0])
print('Train accuracy:', train_score[1])
print('Test loss:', test_score[0])
print('Test accuracy:', test_score[1])

y_pred_proba = model.predict(X_test)
y_pred = np.argmax(y_pred_proba, axis=1)
print("y_pred_proba", y_pred_proba)
print("y_pred", y_pred)


Model: "sequential_45"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_116 (Dense)            (None, 50)                39250     
_________________________________________________________________
dense_117 (Dense)            (None, 100)               5100      
_________________________________________________________________
dense_118 (Dense)            (None, 10)                1010      
Total params: 45,360
Trainable params: 45,360
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Train loss: 0.11028968542814255
Train accuracy: 0.968833327293396
Test loss: 0.13108061254024506
Test accuracy: 0.9609000086784363
y_pred_proba [[1.7531778e-05 6.7586154e-07 8.6121337e-04 ... 9.9721318e-01
  3.0239866e-05 3.1116910e-05]
 [1.6659935e-05 1.8056848e-03 9