<a href="https://colab.research.google.com/github/dayoungcho/CAU_DL4AI/blob/main/week3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## The gears of neural networks: tensor operations

In [None]:
# naive relu

def naive_relu(x):
  assert len(x.shape) == 2  # 조건을 만족할 때 함수가 실행됨
  x = x.copy()              # override 방지
  for i in range(x.shape[0]):
    for j in range(x.shape[1]):
      x[i,j] = max(x[i,j],0)
  return x

import numpy as np

aa = np.array([[1,2,3,-1],[1,-1,-1,0]])
print(aa, '\n')
print(naive_relu(aa))

[[ 1  2  3 -1]
 [ 1 -1 -1  0]] 

[[1 2 3 0]
 [1 0 0 0]]


In [None]:
# naive add

def naive_add(x, y):
  assert len(x.shape) == 2
  assert x.shape == y.shape
  x = x.copy()
  for i in range(x.shape[0]):
      for j in range(x.shape[1]):
          x[i, j] += y[i, j]
  return x

aa = np.array([[1,2,3,-1],[1,-1,-1,0]])

print(aa, '\n')
print(naive_add(aa,aa))

[[ 1  2  3 -1]
 [ 1 -1 -1  0]] 

[[ 2  4  6 -2]
 [ 2 -2 -2  0]]


In [None]:
import time

x = np.random.random((20, 100))
y = np.random.random((20, 100))

t0 = time.time()
for _ in range(1000):
    z = x + y
    z = np.maximum(z, 0.)
print("Took: {0:.2f} s".format(time.time() - t0))

Took: 0.01 s


In [None]:
t0 = time.time()   # elementwise 연산이라 오래걸림 -> vectorized 연산이 좋다
for _ in range(1000):
    z = naive_add(x, y)
    z = naive_relu(z)
print("Took: {0:.2f} s".format(time.time() - t0))

Took: 3.37 s


In [None]:
def naive_add_matrix_and_vector(x, y):
    assert len(x.shape) == 2
    assert len(y.shape) == 1
    assert x.shape[1] == y.shape[0]
    x = x.copy()
    for i in range(x.shape[0]):
        for j in range(x.shape[1]):
            x[i, j] += y[j]
    return x

X = np.random.random((32, 10))
y = np.random.random((10,))
naive_add_matrix_and_vector(X,y).shape

(32, 10)

In [None]:
def naive_vector_dot(x, y):
    assert len(x.shape) == 1
    assert len(y.shape) == 1
    assert x.shape[0] == y.shape[0]
    z = 0.
    for i in range(x.shape[0]):
        z += x[i] * y[i]
    return z

x = np.random.random((32,))
y = np.random.random((32,))
naive_vector_dot(x,y).shape

()

In [None]:
def naive_matrix_vector_dot(x, y):
    assert len(x.shape) == 2
    assert len(y.shape) == 1
    assert x.shape[1] == y.shape[0]
    z = np.zeros(x.shape[0])
    for i in range(x.shape[0]):
        for j in range(x.shape[1]):
            z[i] += x[i, j] * y[j]
    return z

x = np.random.random((32,10))
y = np.random.random((10,))
naive_matrix_vector_dot(x,y).shape

(32,)

In [None]:
def naive_matrix_vector_dot(x, y):   # 위에거랑 결과는 동일
    z = np.zeros(x.shape[0])
    for i in range(x.shape[0]):
        z[i] = naive_vector_dot(x[i, :], y)
    return z

In [None]:
def naive_matrix_dot(x, y):
    assert len(x.shape) == 2
    assert len(y.shape) == 2
    assert x.shape[1] == y.shape[0]
    z = np.zeros((x.shape[0], y.shape[1]))
    for i in range(x.shape[0]):
        for j in range(y.shape[1]):
            row_x = x[i, :]
            column_y = y[:, j]
            z[i, j] = naive_vector_dot(row_x, column_y)
    return z

x = np.random.random((32,10))
y = np.random.random((10,32))
naive_matrix_dot(x,y).shape

(32, 32)

## Broadcasting

In [None]:
import numpy as np
X = np.random.random((32, 10))
y = np.random.random((10,))
print(X.shape)
print(y.shape)

(32, 10)
(10,)


In [None]:
y = np.expand_dims(y, axis=0)
print(y.shape)

Y = np.concatenate([y] * 32, axis=0)
print(Y.shape)   # X와 차원 맞추기

(1, 10)
(32, 10)


In [None]:
# reshaping

x = np.array([[0., 1.],
             [2., 3.],
             [4., 5.]])
print(x.shape)

x = x.reshape((6, 1))
print(x)

(3, 2)
[[0.]
 [1.]
 [2.]
 [3.]
 [4.]
 [5.]]


## Gradient-based optimization

In [None]:
import tensorflow as tf
x = tf.Variable(0.)  # float type으로 만들어줘야함..
with tf.GradientTape() as tape:
    y = 2 * x + 3
grad_of_y_wrt_x = tape.gradient(y, x)
grad_of_y_wrt_x

<tf.Tensor: shape=(), dtype=float32, numpy=2.0>

In [None]:
x = tf.Variable(tf.random.uniform((2, 2)))
with tf.GradientTape() as tape:
    y = 2 * x + 3
grad_of_y_wrt_x = tape.gradient(y, x)
grad_of_y_wrt_x

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[2., 2.],
       [2., 2.]], dtype=float32)>

In [None]:
W = tf.Variable(tf.random.uniform((2, 2)))
b = tf.Variable(tf.zeros((2,)))
x = tf.random.uniform((2, 2))
with tf.GradientTape() as tape:
    y = tf.matmul(x, W) + b   # matmul: 행렬곱
grad_of_y_wrt_W_and_b = tape.gradient(y, [W, b])
grad_of_y_wrt_W_and_b

[<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
 array([[1.7545244, 1.7545244],
        [1.1769668, 1.1769668]], dtype=float32)>,
 <tf.Tensor: shape=(2,), dtype=float32, numpy=array([2., 2.], dtype=float32)>]

1. Say x, y are scalar, find derivatives of $f(x,y) = x^2y+xy+3y$  with respect to x and y.  
(Set x = 2, y = 1 for the calculation)  

2. Set x, W and b as
x=tf.constant(np.array([1.,4.,3.]).reshape(1,3),dtype=tf.float32)
W=tf.Variable(tf.random.uniform((3,2)),dtype=tf.float32)  
b=tf.Variable(tf.zeros((2,)),dtype=tf.float32)  
  
  - Find derivatives of $f(W,b)=(xW+b)$ with respect to W and b. (Hint: search for tf.pow() )  
  
  - Find derivatives of $f(W,b)=(xW+b)^3$ with respect to W and b.

In [None]:
# 1

x = tf.Variable(2.)
y = tf.Variable(1.)

with tf.GradientTape() as tape:
    z = x*x*y + x*y + 3*y
grad_of_x_wrt_x_y = tape.gradient(z, [x,y])
grad_of_x_wrt_x_y

[<tf.Tensor: shape=(), dtype=float32, numpy=5.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=9.0>]

In [None]:
# 2-1

x=tf.constant(np.array([1.,4.,3.]).reshape(1,3),dtype=tf.float32)
W=tf.Variable(tf.random.uniform((3,2)),dtype=tf.float32)
b=tf.Variable(tf.zeros((2,)),dtype=tf.float32)

with tf.GradientTape() as tape:
    y = tf.matmul(x, W) + b
grad_of_y_wrt_W_and_b = tape.gradient(y, [W, b])
grad_of_y_wrt_W_and_b

[<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
 array([[1., 1.],
        [4., 4.],
        [3., 3.]], dtype=float32)>,
 <tf.Tensor: shape=(2,), dtype=float32, numpy=array([1., 1.], dtype=float32)>]

In [None]:
# 2-2

with tf.GradientTape() as tape:
    y = (tf.matmul(x, W) + b)**3
grad_of_y_wrt_W_and_b = tape.gradient(y, [W, b])
grad_of_y_wrt_W_and_b

[<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
 array([[ 80.20228 ,  31.518421],
        [320.8091  , 126.073685],
        [240.60684 ,  94.55527 ]], dtype=float32)>,
 <tf.Tensor: shape=(2,), dtype=float32, numpy=array([80.20228 , 31.518421], dtype=float32)>]

## Looking back at our first example

In [None]:
#  NaiveDense

class NaiveDense:
    def __init__(self, input_size, output_size, activation):
        self.activation = activation

        w_shape = (input_size, output_size)
        w_initial_value = tf.random.uniform(w_shape, minval=0, maxval=1e-1)
        self.W = tf.Variable(w_initial_value)

        b_shape = (output_size,)
        b_initial_value = tf.zeros(b_shape)
        self.b = tf.Variable(b_initial_value)

    def __call__(self, inputs):
        return self.activation(tf.matmul(inputs, self.W) + self.b)

    @property
    def weights(self):
        return [self.W, self.b]

In [None]:
# Sequential

class NaiveSequential:
    def __init__(self, layers):
        self.layers = layers

    def __call__(self, inputs):
        x = inputs
        for layer in self.layers:
           x = layer(x)
        return x

    @property
    def weights(self):
       weights = []
       for layer in self.layers:
           weights += layer.weights
       return weights

In [None]:
# batch generator

import math

class BatchGenerator:
    def __init__(self, images, labels, batch_size=128):
        assert len(images) == len(labels)
        self.index = 0
        self.images = images
        self.labels = labels
        self.batch_size = batch_size
        self.num_batches = math.ceil(len(images) / batch_size)

    def next(self):
        images = self.images[self.index : self.index + self.batch_size]
        labels = self.labels[self.index : self.index + self.batch_size]
        self.index += self.batch_size
        return images, labels

In [None]:
# to run one training step

learning_rate = 1e-3

def update_weights(gradients, weights):
    for g, w in zip(gradients, weights):
        w.assign_sub(g * learning_rate)


def one_training_step(model, images_batch, labels_batch):
    with tf.GradientTape() as tape:
        predictions = model(images_batch)
        per_sample_losses = tf.keras.losses.sparse_categorical_crossentropy(
            labels_batch, predictions)
        average_loss = tf.reduce_mean(per_sample_losses)
    gradients = tape.gradient(average_loss, model.weights)
    update_weights(gradients, model.weights)
    return average_loss

In [None]:
# full training loop

def fit(model, images, labels, epochs, batch_size=128):
    for epoch_counter in range(epochs):
        print(f"Epoch {epoch_counter}")
        batch_generator = BatchGenerator(images, labels)
        for batch_counter in range(batch_generator.num_batches):
            images_batch, labels_batch = batch_generator.next()
            loss = one_training_step(model, images_batch, labels_batch)
            if batch_counter % 100 == 0:
                print(f"loss at batch {batch_counter}: {loss:.2f}")

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype("float32") / 255
test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype("float32") / 255

In [None]:
# classification model

model = NaiveSequential([
    NaiveDense(input_size=28 * 28, output_size=512, activation=tf.nn.relu),
    NaiveDense(input_size=512, output_size=10, activation=tf.nn.softmax)
])
assert len(model.weights) == 4

In [None]:
# model fitting

fit(model, train_images, train_labels, epochs=10, batch_size=128)

Epoch 0
loss at batch 0: 7.25
loss at batch 100: 2.26
loss at batch 200: 2.25
loss at batch 300: 2.11
loss at batch 400: 2.22
Epoch 1
loss at batch 0: 1.92
loss at batch 100: 1.90
loss at batch 200: 1.86
loss at batch 300: 1.73
loss at batch 400: 1.84
Epoch 2
loss at batch 0: 1.60
loss at batch 100: 1.60
loss at batch 200: 1.53
loss at batch 300: 1.44
loss at batch 400: 1.52
Epoch 3
loss at batch 0: 1.34
loss at batch 100: 1.35
loss at batch 200: 1.26
loss at batch 300: 1.22
loss at batch 400: 1.29
Epoch 4
loss at batch 0: 1.14
loss at batch 100: 1.17
loss at batch 200: 1.06
loss at batch 300: 1.06
loss at batch 400: 1.12
Epoch 5
loss at batch 0: 1.00
loss at batch 100: 1.03
loss at batch 200: 0.91
loss at batch 300: 0.94
loss at batch 400: 1.00
Epoch 6
loss at batch 0: 0.89
loss at batch 100: 0.92
loss at batch 200: 0.80
loss at batch 300: 0.85
loss at batch 400: 0.91
Epoch 7
loss at batch 0: 0.80
loss at batch 100: 0.83
loss at batch 200: 0.72
loss at batch 300: 0.77
loss at batch 40

In [None]:
# evaluation

predictions = model(test_images)
predictions = predictions.numpy()
predicted_labels = np.argmax(predictions, axis=1)
matches = predicted_labels == test_labels
print(f"accuracy: {matches.mean():.2f}")

accuracy: 0.81
