<a href="https://colab.research.google.com/github/jigjid/github_task/blob/main/TensorFlow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##processing

In [6]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow.compat.v1 as tf
tf.compat.v1.disable_eager_execution()
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
from keras.datasets import mnist
from matplotlib import pyplot as plt

**Problem 1:** Looking back at scratch

Preparing the dataset

In [5]:
df = pd.read_csv("Iris.csv")
df = df[(df["Species"] == "Iris-versicolor") | (df["Species"] == "Iris-virginica")]
y = df["Species"]
X = df.loc[:, ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"]]
X = np.array(X)
y = np.array(y)
y[y == "Iris-versicolor"] = 0
y[y == "Iris-virginica"] = 1
y = y.astype(np.int64)[:, np.newaxis]
X_train, X_test, y_train,y_test = train_test_split(X,y, test_size = 0.2, random_state = 0)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

**Problem 2:** Thinking about how to deal with scratch and TensorFlow

In [7]:
class GetMiniBatch:
    """
    ミニバッチを取得するイテレータ

    Parameters
    ----------
    X : 次の形のndarray, shape (n_samples, n_features)
      訓練データ
    y : 次の形のndarray, shape (n_samples, 1)
      正解値
    batch_size : int
      バッチサイズ
    seed : int
      NumPyの乱数のシード
    """
    def __init__(self, X, y, batch_size = 10, seed=0):
        self.batch_size = batch_size
        np.random.seed(seed)
        shuffle_index = np.random.permutation(np.arange(X.shape[0]))
        self.X = X[shuffle_index]
        self.y = y[shuffle_index]
        self._stop = np.ceil(X.shape[0]/self.batch_size).astype(np.int)
    def __len__(self):
        return self._stop
    def __getitem__(self,item):
        p0 = item*self.batch_size
        p1 = item*self.batch_size + self.batch_size
        return self.X[p0:p1], self.y[p0:p1]        
    def __iter__(self):
        self._counter = 0
        return self
    def __next__(self):
        if self._counter >= self._stop:
            raise StopIteration()
        p0 = self._counter*self.batch_size
        p1 = self._counter*self.batch_size + self.batch_size
        self._counter += 1
        return self.X[p0:p1], self.y[p0:p1]

# Configuring Hyperparameters
learning_rate = 0.001
batch_size = 10
num_epochs = 100

n_hidden1 = 50
n_hidden2 = 100
n_input = X_train.shape[1]
n_samples = X_train.shape[0]
n_classes = 1

# Determine the shape of the arguments to be passed to the calculation graph
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])
# trainのミニバッチイテレータ
get_mini_batch_train = GetMiniBatch(X_train, y_train, batch_size=batch_size)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  self._stop = np.ceil(X.shape[0]/self.batch_size).astype(np.int)


In [8]:
def example_net(x):
    """
    単純な3層ニューラルネットワーク
    """
    tf.random.set_random_seed(0)
    # 重みとバイアスの宣言
    weights = {
        'w1': tf.Variable(tf.random_normal([n_input, n_hidden1])),
        'w2': tf.Variable(tf.random_normal([n_hidden1, n_hidden2])),
        'w3': tf.Variable(tf.random_normal([n_hidden2, n_classes]))
    }
    biases = {
        'b1': tf.Variable(tf.random_normal([n_hidden1])),
        'b2': tf.Variable(tf.random_normal([n_hidden2])),
        'b3': tf.Variable(tf.random_normal([n_classes]))
    }

    layer_1 = tf.add(tf.matmul(x, weights['w1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    layer_2 = tf.add(tf.matmul(layer_1, weights['w2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)
    layer_output = tf.matmul(layer_2, weights['w3']) + biases['b3'] # tf.addと+は等価である
    return layer_output

# Reading the network structure                               
logits = example_net(X)

# Objective function
loss_op = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=Y, logits=logits))
# Optimization techniques
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

# Estimated results
correct_pred = tf.equal(tf.sign(Y - 0.5), tf.sign(tf.sigmoid(logits) - 0.5))
# Index value calculation
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initializing a variable
init = tf.global_variables_initializer()


# Running Calculation Graphs
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(num_epochs):
        # Loop every epoch
        total_batch = np.ceil(X_train.shape[0]/batch_size).astype(np.int64)
        total_loss = 0
        total_acc = 0
        for i, (mini_batch_x, mini_batch_y) in enumerate(get_mini_batch_train):
            # Loop per mini-batch
            sess.run(train_op, feed_dict={X: mini_batch_x, Y: mini_batch_y})
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: mini_batch_x, Y: mini_batch_y})
            total_loss += loss
        total_loss /= n_samples
        val_loss, acc = sess.run([loss_op, accuracy], feed_dict={X: X_val, Y: y_val})
        print("Epoch {}, loss : {:.4f}, val_loss : {:.4f}, acc : {:.3f}".format(epoch, total_loss, val_loss, acc))
    test_acc = sess.run(accuracy, feed_dict={X: X_test, Y: y_test})
    print("test_acc : {:.3f}".format(test_acc))


print("It is much faster than implemented CNN from scratch. It's also very easy to use \
\nFirst, weights and biases are initialized and then layers are defined.")

Epoch 0, loss : 7.0241, val_loss : 67.6860, acc : 0.375
Epoch 1, loss : 3.4241, val_loss : 23.4026, acc : 0.312
Epoch 2, loss : 1.9387, val_loss : 11.6681, acc : 0.375
Epoch 3, loss : 2.0917, val_loss : 13.1400, acc : 0.312
Epoch 4, loss : 1.7685, val_loss : 17.7284, acc : 0.312
Epoch 5, loss : 1.6097, val_loss : 12.9607, acc : 0.312
Epoch 6, loss : 1.4402, val_loss : 10.0593, acc : 0.312
Epoch 7, loss : 1.3704, val_loss : 9.4797, acc : 0.312
Epoch 8, loss : 1.2536, val_loss : 9.8518, acc : 0.312
Epoch 9, loss : 1.1476, val_loss : 8.5670, acc : 0.375
Epoch 10, loss : 1.0930, val_loss : 8.0430, acc : 0.375
Epoch 11, loss : 1.0412, val_loss : 7.8791, acc : 0.375
Epoch 12, loss : 0.9804, val_loss : 7.1233, acc : 0.375
Epoch 13, loss : 0.9326, val_loss : 6.7908, acc : 0.375
Epoch 14, loss : 0.8792, val_loss : 6.2492, acc : 0.375
Epoch 15, loss : 0.8304, val_loss : 5.7680, acc : 0.375
Epoch 16, loss : 0.7835, val_loss : 5.2886, acc : 0.438
Epoch 17, loss : 0.7384, val_loss : 4.8037, acc : 0

**Problem 3:** Create an Iris model using all three types of objective variables

In [9]:
df = pd.read_csv("Iris.csv")
df = df[(df["Species"] == "Iris-versicolor") | (df["Species"] == "Iris-virginica") | (df["Species"]=="Iris-setosa")]
y = df["Species"]
X = df.loc[:, ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"]]
X = np.array(X)
y = np.array(y)
y[y == "Iris-versicolor"] = 0
y[y == "Iris-virginica"] = 1
y[y == "Iris-setosa"] = 2

enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
y = enc.fit_transform(y[:,np.newaxis])

X_train, X_test, y_train,y_test = train_test_split(X,y, test_size = 0.2, random_state = 0)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

mmsc = MinMaxScaler()
X_train = mmsc.fit_transform(X_train)
X_test = mmsc.transform(X_test)
X_val = mmsc.transform(X_val)



In [10]:
learning_rate = 0.001
batch_size = 10
num_epochs = 100
n_hidden1 = 50
n_hidden2 = 100
n_input = X_train.shape[1]
n_samples = X_train.shape[0]

n_classes = 3
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])

get_mini_batch_train = GetMiniBatch(X_train, y_train, batch_size=batch_size)

logits = example_net(X)
loss_op = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=Y, logits=logits)) 
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)  
train_op = optimizer.minimize(loss_op)  
correct_pred = tf.equal(tf.sign(Y - 0.5), tf.sign(tf.sigmoid(logits) - 0.5))   
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))  
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(num_epochs):
        # Loop every epoch
        total_batch = np.ceil(X_train.shape[0]/batch_size).astype(np.int64)
        total_loss = 0
        total_acc = 0
        for i, (mini_batch_x, mini_batch_y) in enumerate(get_mini_batch_train):
            # Loop per mini-batch
            sess.run(train_op, feed_dict={X: mini_batch_x, Y: mini_batch_y})
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: mini_batch_x, Y: mini_batch_y})
            total_loss += loss
        total_loss /= n_samples
        val_loss, acc = sess.run([loss_op, accuracy], feed_dict={X: X_val, Y: y_val})
        print("Epoch {}, loss : {:.4f}, val_loss : {:.4f}, acc : {:.3f}".format(epoch, total_loss, val_loss, acc))
    test_acc = sess.run(accuracy, feed_dict={X: X_test, Y: y_test})
    print("test_acc : {:.3f}".format(test_acc))

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  self._stop = np.ceil(X.shape[0]/self.batch_size).astype(np.int)


Epoch 0, loss : 1.7987, val_loss : 14.7636, acc : 0.444
Epoch 1, loss : 1.2412, val_loss : 9.2677, acc : 0.431
Epoch 2, loss : 0.7166, val_loss : 5.2194, acc : 0.611
Epoch 3, loss : 0.4205, val_loss : 3.4381, acc : 0.667
Epoch 4, loss : 0.2684, val_loss : 2.1337, acc : 0.667
Epoch 5, loss : 0.1845, val_loss : 1.4346, acc : 0.722
Epoch 6, loss : 0.1488, val_loss : 1.0783, acc : 0.778
Epoch 7, loss : 0.1272, val_loss : 0.9443, acc : 0.847
Epoch 8, loss : 0.1129, val_loss : 0.8756, acc : 0.833
Epoch 9, loss : 0.1016, val_loss : 0.8028, acc : 0.833
Epoch 10, loss : 0.0919, val_loss : 0.7375, acc : 0.833
Epoch 11, loss : 0.0831, val_loss : 0.6720, acc : 0.833
Epoch 12, loss : 0.0745, val_loss : 0.6075, acc : 0.847
Epoch 13, loss : 0.0663, val_loss : 0.5457, acc : 0.847
Epoch 14, loss : 0.0586, val_loss : 0.4917, acc : 0.861
Epoch 15, loss : 0.0514, val_loss : 0.4502, acc : 0.875
Epoch 16, loss : 0.0446, val_loss : 0.4147, acc : 0.875
Epoch 17, loss : 0.0382, val_loss : 0.3815, acc : 0.889
E

**Problem 4:** Create a model for House Prices

In [12]:
df = pd.read_csv("train.csv")
X = df[['GrLivArea', 'YearBuilt']].to_numpy()
y = df[['SalePrice']].to_numpy()
print("Xshape:", X.shape)
print("yshape:", y.shape)
X = np.log1p(X)
y = np.log1p(y)

print("Xshape:", X.shape)
print("yshape:", y.shape)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

mmsc = MinMaxScaler()
X_train = mmsc.fit_transform(X_train)
X_test = mmsc.transform(X_test)
X_val = mmsc.transform(X_val)

Xshape: (1460, 2)
yshape: (1460, 1)
Xshape: (1460, 2)
yshape: (1460, 1)


In [13]:
learning_rate = 0.001
batch_size = 10
num_epochs = 50
n_hidden1 = 50
n_hidden2 = 100
n_input = X_train.shape[1]
n_samples = X_train.shape[0]
n_classes = 1
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])

get_mini_batch_train = GetMiniBatch(X_train, y_train, batch_size=batch_size)

logits = example_net(X)
loss_op =  tf.losses.mean_squared_error(labels=Y, predictions=logits)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)  
train_op = optimizer.minimize(loss_op)  
correct_pred = tf.equal(tf.sign(Y - 0.5), tf.sign(tf.sigmoid(logits) - 0.5))   
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))  
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(num_epochs):
        # Loop every epoch
        total_batch = np.ceil(X_train.shape[0]/batch_size).astype(np.int64)
        total_loss = 0
        total_acc = 0
        for i, (mini_batch_x, mini_batch_y) in enumerate(get_mini_batch_train):
            # Loop per mini-batch
            sess.run(train_op, feed_dict={X: mini_batch_x, Y: mini_batch_y})
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: mini_batch_x, Y: mini_batch_y})
            total_loss += loss
        total_loss /= n_samples
        val_loss, acc = sess.run([loss_op, accuracy], feed_dict={X: X_val, Y: y_val})
        print("Epoch {}, loss : {:.4f}, val_loss : {:.4f}, acc : {:.3f}".format(epoch, total_loss, val_loss, acc))
    test_acc = sess.run(accuracy, feed_dict={X: X_test, Y: y_test})
    print("test_acc : {:.3f}".format(test_acc))

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  self._stop = np.ceil(X.shape[0]/self.batch_size).astype(np.int)


Epoch 0, loss : 2.1372, val_loss : 2.4993, acc : 1.000
Epoch 1, loss : 0.1843, val_loss : 0.8833, acc : 1.000
Epoch 2, loss : 0.0764, val_loss : 0.4178, acc : 1.000
Epoch 3, loss : 0.0468, val_loss : 0.2916, acc : 1.000
Epoch 4, loss : 0.0351, val_loss : 0.2206, acc : 1.000
Epoch 5, loss : 0.0277, val_loss : 0.1818, acc : 1.000
Epoch 6, loss : 0.0227, val_loss : 0.1583, acc : 1.000
Epoch 7, loss : 0.0189, val_loss : 0.1381, acc : 1.000
Epoch 8, loss : 0.0161, val_loss : 0.1214, acc : 1.000
Epoch 9, loss : 0.0143, val_loss : 0.1061, acc : 1.000
Epoch 10, loss : 0.0130, val_loss : 0.0945, acc : 1.000
Epoch 11, loss : 0.0120, val_loss : 0.0853, acc : 1.000
Epoch 12, loss : 0.0112, val_loss : 0.0780, acc : 1.000
Epoch 13, loss : 0.0106, val_loss : 0.0733, acc : 1.000
Epoch 14, loss : 0.0100, val_loss : 0.0709, acc : 1.000
Epoch 15, loss : 0.0097, val_loss : 0.0705, acc : 1.000
Epoch 16, loss : 0.0094, val_loss : 0.0714, acc : 1.000
Epoch 17, loss : 0.0091, val_loss : 0.0739, acc : 1.000
Ep

**Problem 5:** Create a model for MNIST

In [14]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(-1, 784)
X_test = X_test.reshape(-1, 784)

X_train = X_train.astype(np.float)
X_test = X_test.astype(np.float)

X_train /= 255
X_test /= 255

y_train = y_train.astype(np.int)[:, np.newaxis]
y_test = y_test.astype(np.int)[:, np.newaxis]

enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
y_train_one_hot = enc.fit_transform(y_train[:])
y_test_one_hot = enc.fit_transform(y_test[:])

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train_one_hot, test_size=0.2)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  X_train = X_train.astype(np.float)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  X_test = X_test.astype(np.float)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_train = y_train.astype(np.int)[:, np.newaxis]
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_test = y_test.astype(np.int)[:, np.newaxis]


In [15]:
learning_rate = 0.001
batch_size = 10
num_epochs = 40
n_hidden1 = 50
n_hidden2 = 100
n_input = X_train.shape[1]
n_samples = X_train.shape[0]
n_classes = 10

X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])


get_mini_batch_train = GetMiniBatch(X_train, y_train, batch_size=batch_size)

logits = example_net(X)  
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=logits))  
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)  
train_op = optimizer.minimize(loss_op)  
correct_pred = tf.equal(tf.argmax(Y,1), tf.argmax(tf.nn.softmax(logits),1))   
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))  
init = tf.global_variables_initializer()

with tf.Session() as sess:  
    sess.run(init)  
    for epoch in range(num_epochs):  
        total_batch = np.ceil(X_train.shape[0]/batch_size).astype(np.int64)  
        total_loss = 0  
        total_acc = 0  
        for i, (mini_batch_x, mini_batch_y) in enumerate(get_mini_batch_train):  
            sess.run(train_op, feed_dict={X: mini_batch_x, Y: mini_batch_y})  
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: mini_batch_x, Y: mini_batch_y})  
            total_loss += loss  
        total_loss /= n_samples  
        val_loss, acc = sess.run([loss_op, accuracy], feed_dict={X: X_val, Y: y_val})  
        print("Epoch {}, loss : {:.4f}, val_loss : {:.4f}, acc : {:.3f}".format(epoch, total_loss, val_loss, acc))  
    test_acc = sess.run(accuracy, feed_dict={X: X_test, Y: y_test_one_hot})  
    print("test_acc : {:.3f}".format(test_acc))

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  self._stop = np.ceil(X.shape[0]/self.batch_size).astype(np.int)
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



Epoch 0, loss : 2.7857, val_loss : 8.4334, acc : 0.794
Epoch 1, loss : 0.5005, val_loss : 3.6549, acc : 0.833
Epoch 2, loss : 0.2364, val_loss : 2.2367, acc : 0.845
Epoch 3, loss : 0.1332, val_loss : 1.5854, acc : 0.851
Epoch 4, loss : 0.0870, val_loss : 1.1859, acc : 0.853
Epoch 5, loss : 0.0617, val_loss : 0.9793, acc : 0.862
Epoch 6, loss : 0.0479, val_loss : 0.8601, acc : 0.876
Epoch 7, loss : 0.0392, val_loss : 0.8195, acc : 0.880
Epoch 8, loss : 0.0334, val_loss : 0.7861, acc : 0.885
Epoch 9, loss : 0.0290, val_loss : 0.7148, acc : 0.892
Epoch 10, loss : 0.0263, val_loss : 0.7145, acc : 0.896
Epoch 11, loss : 0.0240, val_loss : 0.7001, acc : 0.896
Epoch 12, loss : 0.0220, val_loss : 0.6713, acc : 0.902
Epoch 13, loss : 0.0199, val_loss : 0.6634, acc : 0.910
Epoch 14, loss : 0.0184, val_loss : 0.6619, acc : 0.908
Epoch 15, loss : 0.0174, val_loss : 0.6458, acc : 0.914
Epoch 16, loss : 0.0165, val_loss : 0.6312, acc : 0.915
Epoch 17, loss : 0.0156, val_loss : 0.6610, acc : 0.913
Ep