# Sprint 13

## TensorFlow

### [Problem 1] Looking back on the scratch

Review the scratches so far and enumerate what you needed to implement deep learning.

- Had to initialize the weights and bias.
- Needed an epoch loop.
- Number of nodes for each layer.
- Activation function of each layer.
- Loss function
- Forward propagation & back propagation.
- Batch size.

### Preparing the dataset

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
import tensorflow.compat.v1 as tf
tf.disable_eager_execution()

In [2]:
#Load dataset
df = pd.read_csv("Iris.csv")

#Condition extraction from data frame
df = df[(df["Species"] == "Iris-versicolor") | (df["Species"] == "Iris-virginica")]
y = df["Species"]
X = df.loc[:, ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"]]

# NumPy
X = np.array(X)
y = np.array(y)

# Convert label to number
y[y == "Iris-versicolor"] = 0
y[y == "Iris-virginica"] = 1
y = y.astype(np.int64)[:, np.newaxis]

#Split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# さらにtrainとvalに分割
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

### [Problem 2] Consider the correspondence between scratch and TensorFlow

In [3]:
class GetMiniBatch:
    """
    Iterator to get a mini-batch
    Parameters
    ----------
    X : The following forms of ndarray, shape (n_samples, n_features)
      Training data
    y : The following form of ndarray, shape (n_samples, 1)
      Correct answer value
    batch_size : int
      Batch size
    seed : int
      NumPy random number seed
    """
    def __init__(self, X, y, batch_size = 10, seed=0):
        self.batch_size = batch_size
        np.random.seed(seed)
        shuffle_index = np.random.permutation(np.arange(X.shape[0]))
        self.X = X[shuffle_index]
        self.y = y[shuffle_index]
        self._stop = np.ceil(X.shape[0]/self.batch_size).astype(np.int)
    def __len__(self):
        return self._stop
    def __getitem__(self,item):
        p0 = item*self.batch_size
        p1 = item*self.batch_size + self.batch_size
        return self.X[p0:p1], self.y[p0:p1]        
    def __iter__(self):
        self._counter = 0
        return self
    def __next__(self):
        if self._counter >= self._stop:
            raise StopIteration()
        p0 = self._counter*self.batch_size
        p1 = self._counter*self.batch_size + self.batch_size
        self._counter += 1
        return self.X[p0:p1], self.y[p0:p1]

In [4]:
# Hyperparameter settings
learning_rate = 0.001
batch_size = 10
num_epochs = 100
n_hidden1 = 50
n_hidden2 = 100
n_input = X_train.shape[1]
n_samples = X_train.shape[0]
n_classes = 1

In [5]:
#Determine the shape of the argument to be passed to the calculation graph
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])

In [6]:
# train mini batch iterator
get_mini_batch_train = GetMiniBatch(X_train, y_train, batch_size=batch_size)

In [7]:
def example_net(x):
    """
    Simple 3-layer neural network
    """
    tf.random.set_random_seed(0)
    # Declaration of weight and bias
    weights = {
        'w1': tf.Variable(tf.random_normal([n_input, n_hidden1])),
        'w2': tf.Variable(tf.random_normal([n_hidden1, n_hidden2])),
        'w3': tf.Variable(tf.random_normal([n_hidden2, n_classes]))
    }
    biases = {
        'b1': tf.Variable(tf.random_normal([n_hidden1])),
        'b2': tf.Variable(tf.random_normal([n_hidden2])),
        'b3': tf.Variable(tf.random_normal([n_classes]))
    }
    layer_1 = tf.add(tf.matmul(x, weights['w1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    layer_2 = tf.add(tf.matmul(layer_1, weights['w2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)
    layer_output = tf.matmul(layer_2, weights['w3']) + biases['b3'] # tf.add and + are equivalent
    return layer_output

In [8]:
#Read network structure                              
logits = example_net(X)

# Objective function
loss_op = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=Y, logits=logits))

# Optimization method
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

In [9]:
# Estimated result
correct_pred = tf.equal(tf.sign(Y - 0.5), tf.sign(tf.sigmoid(logits) - 0.5))

#Indicator value calculation
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

#Initialization of variable
init = tf.global_variables_initializer()

In [10]:
#Run calculation graph
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(num_epochs):
        #Loop for each epoch
        total_batch = np.ceil(X_train.shape[0]/batch_size).astype(np.int64)
        total_loss = 0
        total_acc = 0
        
        for i, (mini_batch_x, mini_batch_y) in enumerate(get_mini_batch_train):
            # Loop for each mini-batch
            sess.run(train_op, feed_dict={X: mini_batch_x, Y: mini_batch_y})
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: mini_batch_x, Y: mini_batch_y})
            total_loss += loss
        total_loss /= n_samples
        val_loss, acc = sess.run([loss_op, accuracy], feed_dict={X: X_val, Y: y_val})
        
        print("Epoch {}, loss : {:.4f}, val_loss : {:.4f}, acc : {:.3f}".format(epoch, total_loss, val_loss, acc))
    
    test_acc = sess.run(accuracy, feed_dict={X: X_test, Y: y_test})
    print("test_acc : {:.3f}".format(test_acc))

Epoch 0, loss : 7.0241, val_loss : 67.6859, acc : 0.375
Epoch 1, loss : 3.4241, val_loss : 23.4026, acc : 0.312
Epoch 2, loss : 1.9387, val_loss : 11.6681, acc : 0.375
Epoch 3, loss : 2.0917, val_loss : 13.1400, acc : 0.312
Epoch 4, loss : 1.7685, val_loss : 17.7284, acc : 0.312
Epoch 5, loss : 1.6097, val_loss : 12.9607, acc : 0.312
Epoch 6, loss : 1.4402, val_loss : 10.0593, acc : 0.312
Epoch 7, loss : 1.3704, val_loss : 9.4797, acc : 0.312
Epoch 8, loss : 1.2536, val_loss : 9.8518, acc : 0.312
Epoch 9, loss : 1.1476, val_loss : 8.5670, acc : 0.375
Epoch 10, loss : 1.0930, val_loss : 8.0429, acc : 0.375
Epoch 11, loss : 1.0412, val_loss : 7.8791, acc : 0.375
Epoch 12, loss : 0.9804, val_loss : 7.1233, acc : 0.375
Epoch 13, loss : 0.9326, val_loss : 6.7908, acc : 0.375
Epoch 14, loss : 0.8792, val_loss : 6.2492, acc : 0.375
Epoch 15, loss : 0.8304, val_loss : 5.7681, acc : 0.375
Epoch 16, loss : 0.7835, val_loss : 5.2886, acc : 0.438
Epoch 17, loss : 0.7384, val_loss : 4.8037, acc : 0

### [Problem 3] Create a model of Iris using all three types of objective variables

In [11]:
df = pd.read_csv("Iris.csv")

y = df["Species"].values
X = df.loc[:, ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"]].values

encoder = OneHotEncoder()
y_one_hot = encoder.fit_transform(y[:, np.newaxis]).toarray()

In [12]:
#Split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y_one_hot, test_size=0.2, random_state=0)

# さらにtrainとvalに分割
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

In [13]:
# Hyperparameter settings
learning_rate = 0.002
batch_size = 10
num_epochs = 100
n_hidden1 = 50
n_hidden2 = 100
n_input = X_train.shape[1]
n_samples = X_train.shape[0]
n_classes = 3

In [14]:
#Determine the shape of the argument to be passed to the calculation graph
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])

In [15]:
# train mini batch iterator
get_mini_batch_train = GetMiniBatch(X_train, y_train, batch_size=batch_size)

In [16]:
#Read network structure                              
logits = example_net(X)

# Objective function (used softmax instead of sigmoid)
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y, logits=logits))

# Optimization method
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

In [17]:
# Estimated result
pred = tf.argmax(logits, 1)
true = tf.argmax(Y, 1)
correct_pred = tf.equal(true, pred)

#Indicator value calculation
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

#Initialization of variable
init = tf.global_variables_initializer()

In [18]:
#Run calculation graph
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(num_epochs):
        #Loop for each epoch
        total_batch = np.ceil(X_train.shape[0]/batch_size).astype(np.int64)
        total_loss = 0
        total_acc = 0
        
        for i, (mini_batch_x, mini_batch_y) in enumerate(get_mini_batch_train):
            # Loop for each mini-batch
            sess.run(train_op, feed_dict={X: mini_batch_x, Y: mini_batch_y})
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: mini_batch_x, Y: mini_batch_y})
            total_loss += loss
        total_loss /= n_samples
        _, train_acc = sess.run([loss_op, accuracy], feed_dict={X: X_train, Y: y_train})
        val_loss, val_acc = sess.run([loss_op, accuracy], feed_dict={X: X_val, Y: y_val})
        
        print("Epoch {}, loss : {:.4f}, train_acc : {:.4f}, val_loss : {:.4f}, val_acc : {:.3f}".format(epoch, total_loss, train_acc, val_loss, val_acc))
    
    test_acc = sess.run(accuracy, feed_dict={X: X_test, Y: y_test})
    
    print("test_acc : {:.3f}".format(test_acc))

Epoch 0, loss : 9.9336, train_acc : 0.6250, val_loss : 80.3559, val_acc : 0.542
Epoch 1, loss : 5.8010, train_acc : 0.3125, val_loss : 45.6701, val_acc : 0.292
Epoch 2, loss : 2.7243, train_acc : 0.3750, val_loss : 18.6450, val_acc : 0.208
Epoch 3, loss : 0.3534, train_acc : 0.7396, val_loss : 4.2170, val_acc : 0.583
Epoch 4, loss : 0.1017, train_acc : 0.9271, val_loss : 1.7571, val_acc : 0.875
Epoch 5, loss : 0.0158, train_acc : 0.9062, val_loss : 1.0644, val_acc : 0.792
Epoch 6, loss : 0.0348, train_acc : 0.9583, val_loss : 1.3975, val_acc : 0.917
Epoch 7, loss : 0.0148, train_acc : 0.9271, val_loss : 0.6237, val_acc : 0.833
Epoch 8, loss : 0.0180, train_acc : 0.9583, val_loss : 1.1859, val_acc : 0.917
Epoch 9, loss : 0.0154, train_acc : 0.9688, val_loss : 0.7608, val_acc : 0.917
Epoch 10, loss : 0.0166, train_acc : 0.9583, val_loss : 1.4457, val_acc : 0.917
Epoch 11, loss : 0.0148, train_acc : 0.9062, val_loss : 1.0058, val_acc : 0.833
Epoch 12, loss : 0.0276, train_acc : 0.9583, va

## [Problem 4] Creating a model of House Prices

In [19]:
df = pd.read_csv('../Week 5/train.csv')
df.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


In [20]:
y = df[['SalePrice']].values
X = df[['GrLivArea', 'YearBuilt']].values

X = (X - X.mean()) / X.std()
y = (y - y.mean()) / y.std()

In [21]:
#Split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# さらにtrainとvalに分割
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

In [22]:
# train mini batch iterator
get_mini_batch_train = GetMiniBatch(X_train, y_train, batch_size=batch_size)

In [23]:
# Hyperparameter settings
learning_rate = 0.001
batch_size = 10
num_epochs = 100
n_hidden1 = 50
n_hidden2 = 100
n_input = X_train.shape[1]
n_samples = X_train.shape[0]
n_classes = 1

In [24]:
#Determine the shape of the argument to be passed to the calculation graph
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])

In [25]:
#Read network structure                              
logits = example_net(X)

# Objective function (used softmax instead of sigmoid)
loss_op = tf.reduce_mean(tf.square(Y - logits))

# Optimization method
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

In [26]:
#Initialization of variable
init = tf.global_variables_initializer()

In [27]:
#Run calculation graph
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(num_epochs):
        #Loop for each epoch
        total_batch = np.ceil(X_train.shape[0]/batch_size).astype(np.int64)
        total_loss = 0
        total_acc = 0
        
        for i, (mini_batch_x, mini_batch_y) in enumerate(get_mini_batch_train):
            # Loop for each mini-batch
            sess.run(train_op, feed_dict={X: mini_batch_x, Y: mini_batch_y})
            loss = sess.run(loss_op, feed_dict={X: mini_batch_x, Y: mini_batch_y})
            total_loss += loss
        total_loss /= n_samples
        val_loss = sess.run(loss_op, feed_dict={X: X_val, Y: y_val})
        
        print("Epoch {}, loss : {:.4f}, val_loss : {:.4f}".format(epoch, total_loss, val_loss))
    
    test_loss = sess.run(loss_op, feed_dict={X: X_test, Y: y_test})
    
    print("test_loss : {:.3f}".format(test_loss))

Epoch 0, loss : 13.6876, val_loss : 6.4676
Epoch 1, loss : 0.6711, val_loss : 3.7183
Epoch 2, loss : 0.3519, val_loss : 2.6053
Epoch 3, loss : 0.2480, val_loss : 1.9430
Epoch 4, loss : 0.1981, val_loss : 1.6368
Epoch 5, loss : 0.1637, val_loss : 1.4228
Epoch 6, loss : 0.1422, val_loss : 1.2806
Epoch 7, loss : 0.1279, val_loss : 1.1617
Epoch 8, loss : 0.1173, val_loss : 1.0494
Epoch 9, loss : 0.1084, val_loss : 0.9712
Epoch 10, loss : 0.1018, val_loss : 0.9080
Epoch 11, loss : 0.0954, val_loss : 0.8536
Epoch 12, loss : 0.0907, val_loss : 0.8116
Epoch 13, loss : 0.0838, val_loss : 0.7781
Epoch 14, loss : 0.0811, val_loss : 0.7527
Epoch 15, loss : 0.0780, val_loss : 0.7273
Epoch 16, loss : 0.0742, val_loss : 0.6913
Epoch 17, loss : 0.0729, val_loss : 0.6767
Epoch 18, loss : 0.0714, val_loss : 0.6609
Epoch 19, loss : 0.0703, val_loss : 0.6400
Epoch 20, loss : 0.0697, val_loss : 0.6184
Epoch 21, loss : 0.0714, val_loss : 0.6023
Epoch 22, loss : 0.0695, val_loss : 0.5806
Epoch 23, loss : 0.0

### [Problem 5] Creating a MNIST model

In [28]:
from keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(-1, 784)
X_test = X_test.reshape(-1, 784)
X_train = X_train.astype(np.float)
X_test = X_test.astype(np.float)
X_train /= 255
X_test /= 255

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2)

print(X_train.shape)
print(y_train.shape)
print(X_val.shape)
print(y_val.shape)

enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
y_train = enc.fit_transform(y_train[:, np.newaxis])
y_val = enc.transform(y_val[:, np.newaxis])
y_test = enc.transform(y_test[:, np.newaxis])

print(y_train.shape)
print(y_val.shape)

(48000, 784)
(48000,)
(12000, 784)
(12000,)
(48000, 10)
(12000, 10)


In [29]:
# Hyperparameter settings
learning_rate = 0.01
batch_size = 100
num_epochs = 20
n_hidden1 = 50
n_hidden2 = 100
n_input = X_train.shape[1]
n_samples = X_train.shape[0]
n_classes = 10

In [30]:
#Determine the shape of the argument to be passed to the calculation graph
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])

In [31]:
# train mini batch iterator
get_mini_batch_train = GetMiniBatch(X_train, y_train, batch_size=batch_size)

In [32]:
#Read network structure                              
logits = example_net(X)

# Objective function (used softmax instead of sigmoid)
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y, logits=logits))

# Optimization method
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

In [33]:
# Estimated result
pred = tf.argmax(tf.nn.softmax(logits), 1)
true = tf.argmax(Y, 1)
correct_pred = tf.equal(true, pred)

#Indicator value calculation
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

#Initialization of variable
init = tf.global_variables_initializer()

In [34]:
#Run calculation graph
with tf.Session() as sess:
    sess.run(init)
    
    for epoch in range(num_epochs):
        #Loop for each epoch
        total_batch = np.ceil(X_train.shape[0]/batch_size).astype(np.int64)
        total_loss = 0
        total_acc = 0
        
        for i, (mini_batch_x, mini_batch_y) in enumerate(get_mini_batch_train):
            # Loop for each mini-batch
            sess.run(train_op, feed_dict={X: mini_batch_x, Y: mini_batch_y})
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: mini_batch_x, Y: mini_batch_y})
            total_loss += loss
        total_loss /= n_samples
        _, train_acc = sess.run([loss_op, accuracy], feed_dict={X: X_train, Y: y_train})
        val_loss, val_acc = sess.run([loss_op, accuracy], feed_dict={X: X_val, Y: y_val})
        
        print("Epoch {}, loss : {:.4f}, train_acc : {:.4f}, val_loss : {:.4f}, val_acc : {:.3f}".format(epoch, total_loss, train_acc, val_loss, val_acc))
    
    test_acc = sess.run(accuracy, feed_dict={X: X_test, Y: y_test})
    
    print("test_acc : {:.3f}".format(test_acc))

Epoch 0, loss : 0.1291, train_acc : 0.6909, val_loss : 2.1394, val_acc : 0.681
Epoch 1, loss : 0.0132, train_acc : 0.7291, val_loss : 1.2024, val_acc : 0.720
Epoch 2, loss : 0.0090, train_acc : 0.7897, val_loss : 0.9496, val_acc : 0.780
Epoch 3, loss : 0.0073, train_acc : 0.7987, val_loss : 0.9101, val_acc : 0.787
Epoch 4, loss : 0.0063, train_acc : 0.8353, val_loss : 0.8053, val_acc : 0.825
Epoch 5, loss : 0.0057, train_acc : 0.8451, val_loss : 0.7337, val_acc : 0.833
Epoch 6, loss : 0.0050, train_acc : 0.8771, val_loss : 0.6710, val_acc : 0.867
Epoch 7, loss : 0.0045, train_acc : 0.8864, val_loss : 0.6416, val_acc : 0.877
Epoch 8, loss : 0.0042, train_acc : 0.8965, val_loss : 0.6434, val_acc : 0.888
Epoch 9, loss : 0.0039, train_acc : 0.9001, val_loss : 0.5668, val_acc : 0.889
Epoch 10, loss : 0.0037, train_acc : 0.9099, val_loss : 0.5135, val_acc : 0.900
Epoch 11, loss : 0.0036, train_acc : 0.9131, val_loss : 0.5077, val_acc : 0.904
Epoch 12, loss : 0.0033, train_acc : 0.9155, val_l