In [3]:
import numpy as np
import tensorflow as tf

In [8]:
x = np.array(2)
x.shape

0

In [16]:
y = np.array([1,2])
y.shape

(2,)

In [25]:
z = np.array([[-1,2,3],
              [3,4,5]])
z.shape
z.shape

(2, 3)

In [28]:
w = np.array([[[1,2,3],
               [4,5,6]],
              [[7,8,9],
               [10,11,12]]])
w.shape


(2, 2, 3)

In [23]:
def naive_relu(x):
    assert len(x.shape) == 2
    x = x.copy()
    for i in range(x.shape[0]):
        for j in range(x.shape[1]):
            x[i, j] = max(x[i, j], 0)
    return x

In [26]:
naive_relu(z)

array([[0, 2, 3],
       [3, 4, 5]])

In [27]:
def naive_add(x, y):
    assert len(x.shape) == 2
    assert x.shape == y.shape
    x = x.copy()
    for i in range(x.shape[0]):
        for j in range(x.shape[1]):
            x[i, j] += y[i, j]
    return x

In [29]:
v = np.array([[7, 8, 9],
              [10, 11, 12]])

In [30]:
naive_add(z, v)

array([[ 6, 10, 12],
       [13, 15, 17]])

In [31]:
import time

In [39]:
x = np.random.random((20, 100))
y = np.random.random((20, 100))

t0 = time.time()
for _ in range(1000):
    z = x + y
    z = np.maximum(z, 0.)
t1 = time.time()
print("Vectorized version: %f seconds" % (t1 - t0))

t2 = time.time()
for _ in range(1000):
    z = naive_add(x, y)
    z = naive_relu(z)
t3 = time.time()
print("Naive version: %f seconds" % (t3 - t2))


Vectorized version: 0.012146 seconds
Naive version: 1.657449 seconds


In [42]:
A = np.random.random((32, 10))
b = np.random.random((10,))

b = np.expand_dims(b, axis=0)
b.shape


(1, 10)

In [44]:
B = np.concatenate([b] * 32, axis=0)
B.shape

(32, 10)

In [45]:
def naive_add_matrix_and_vector(x, y):
    assert len(x.shape) == 2
    assert len(y.shape) == 1
    assert x.shape[1] == y.shape[0]
    x = x.copy()
    for i in range(x.shape[0]):
        for j in range(x.shape[1]):
            x[i, j] += y[j]
    return x

In [47]:
x = np.array([[1, 2],
              [3, 4]])
y = np.array([1, 2])
naive_add_matrix_and_vector(x, y)

array([[2, 4],
       [4, 6]])

In [48]:
x = np.random.random((64, 3, 32, 10))
y = np.random.random((32, 10))
z = np.maximum(x, y)
z.shape

(64, 3, 32, 10)

In [51]:
x = np.random.random((32,))
y = np.random.random((32,))
z = np.dot(x, y)
print(x)
print(y)
print(z)

[0.52105226 0.87666829 0.62926254 0.28950919 0.06325177 0.56998732
 0.92469323 0.10468054 0.79432369 0.88092596 0.61301091 0.10738974
 0.74350137 0.20137366 0.75637048 0.90654844 0.98305312 0.40684916
 0.43227564 0.42480658 0.88878975 0.70654758 0.03420572 0.22252645
 0.50590084 0.4364481  0.49629987 0.00845462 0.23803342 0.53845544
 0.367019   0.82094759]
[0.83587135 0.78280186 0.20623463 0.69804845 0.44612921 0.43711701
 0.66479639 0.01237058 0.12662672 0.83504204 0.15820763 0.66551139
 0.43918729 0.28834517 0.38485647 0.34032337 0.28550246 0.84057408
 0.39901496 0.84130446 0.80488805 0.06401842 0.99957512 0.89468941
 0.30148168 0.07837063 0.82727172 0.09777112 0.68091752 0.50284708
 0.09210093 0.85168429]
8.246292216780319


In [52]:
def naive_vector_dot(x, y):
    assert len(x.shape) == 1
    assert len(x.shape) == 1
    assert x.shape[0] == y.shape[0]
    z = 0.
    for i in range(x.shape[0]):
        z += x[i] * y[i]
    return z

In [53]:
x = np.array([1, 2])
y = np.array([3, 4])
naive_vector_dot(x, y)

11.0

In [54]:
def naive_matrix_vector_dot(x, y):
    assert len(x.shape) == 2
    assert len(y.shape) == 1
    assert x.shape[1] == y.shape[0]
    z = np.zeros(x.shape[0])
    for i in range(x.shape[0]):
        for j in range(x.shape[1]):
            z[i] += x[i, j] * y[j]
    return z

In [56]:
x = np.array([[1, 2, 3],
              [3, 4, 5]])
y = np.array([1, 2, 3])
naive_matrix_vector_dot(x, y)

array([14., 26.])

In [57]:
def naive_matrix_vector_dot(x, y):
    z = np.zeros(x.shape[0])
    for i in range(x.shape[0]):
        z[i] = naive_vector_dot(x[i, :], y)
    return z

In [58]:
naive_matrix_vector_dot(x, y)

array([14., 26.])

In [None]:
def naive_matrix_dot(x, y):
    assert len(x.shape) == 2
    assert len(y.shape) == 2
    assert x.shape[1] == y.shape[0]
    z = np.zeros((x.shape[0], y.shape[1]))
    for i in range(x.shape[0]):
        for j in range(y.shape[1]):
            row_x = x[i, :]
            column_y = y[:, j]
            z[i, j] = naive_vector_dot(row_x, column_y)
    return z


In [59]:
x = np.array([[[1, 2, 3],
              [1, 2, 3]],
              [[1, 2, 3],
              [1, 2, 3]]])

In [62]:
print(x.shape)
print(x.ndim)

(2, 2, 3)
3


In [74]:
x = np.random.random((28, 28))
print(x)
    

[[1.81127383e-01 6.87794538e-01 5.19075762e-01 6.97183048e-01
  3.37500386e-01 9.93293940e-01 9.64005415e-01 9.02216669e-01
  5.76743229e-01 6.31765495e-01 2.63099575e-01 3.18775602e-01
  4.25042125e-01 1.05155552e-01 1.88612663e-01 5.57208423e-01
  2.46425075e-01 7.93102755e-01 2.82057801e-01 1.15777607e-01
  4.07900460e-01 9.24510489e-01 8.34997723e-01 9.41314813e-01
  5.04836368e-01 8.93473260e-02 7.02373964e-01 1.82491532e-01]
 [4.57644321e-01 3.84335644e-01 7.89254991e-02 5.43397204e-01
  8.36800730e-01 9.21423721e-01 9.69809577e-01 7.92079200e-01
  1.37217556e-01 2.53694934e-01 6.86584181e-01 7.14011778e-01
  9.09879825e-02 1.08822668e-01 9.54296229e-01 2.90582198e-01
  7.92587031e-01 1.24847130e-01 7.85111302e-01 5.38405000e-01
  2.94311103e-02 9.27633329e-01 7.42301033e-01 8.61127411e-01
  2.60341969e-01 3.81835257e-01 2.36249244e-01 5.66380011e-01]
 [9.05112643e-01 3.09236768e-01 1.41953499e-02 8.78253379e-01
  8.84028137e-01 9.06382083e-01 9.87489243e-01 1.69502362e-01
  3.14

In [76]:
class NaiveDense:
    def __init__(self, input_size, output_size, activation):
        self.activation = activation

        w_shape = (input_size, output_size)
        w_initial_value = tf.random.uniform(w_shape, minval=0, maxval=1e-1)
        self.W = tf.Variable(w_initial_value)

        b_shape = (output_size,)
        b_initial_value = tf.zeros(b_shape)
        self.b = tf.Variable(b_initial_value)

    def __call__(self, inputs):
        return self.activation(tf.matmul(inputs, self.W) + self.b)
    
    @property
    def weights(self):
        return [self.W, self.b]
    


In [77]:
class NaiveSequential:
    def __init__(self, layers):
        self.layers = layers

    def __call__(self, inputs):
        x = inputs
        for layer in self.layers:
            x = layer(x)
        return x
    
    @property
    def weights(self):
        weights = []
        for layer in self.layers:
            weights += layer.weights
        return weights

In [78]:
model = NaiveSequential([
    NaiveDense(input_size=28 * 28, output_size=512, activation=tf.nn.relu),
    NaiveDense(input_size=512, output_size=10, activation=tf.nn.softmax)
])
assert len(model.weights) == 4

In [79]:
import math

In [82]:
class BatchGenerator:
    def __init__(self, images, labels, batch_size=128):
        assert len(images) == len(labels)
        self.index = 0
        self.images = images
        self.labels = labels
        self.batch_size = batch_size
        self.num_batches = math.ceil(len(images) / batch_size)

    def next(self):
        images = self.images[self.index : self.index + self.batch_size]
        labels = self.labels[self.index : self.index + self.batch_size]
        self.index += self.batch_size
        return images, labels


In [83]:
def one_training_step(model, images_batch, labels_batch):
    with tf.GradientTape() as tape:
        predictions = model(images_batch)
        per_sample_losses = tf.keras.losses.sparse_categorical_crossentropy(labels_batch, predictions)
        average_loss = tf.reduce_mean(per_sample_losses)
    gradients = tape.gradient(average_loss, model.weights)
    update_weights(gradients, model.weights)
    return average_loss


In [84]:
learning_rate = 1e-3


In [85]:
def update_weights(gradients, weights):
    for g, w in zip(gradients, weights):
        w.assign_sub(g * learning_rate)


In [86]:
def fit(model, images, labels, epochs, batch_size=128):
    for epoch_counter in range(epochs):
        print(f"Epoch {epoch_counter}")
        batch_generator = BatchGenerator(images, labels)
        for batch_counter in range(batch_generator.num_batches):
            images_batch, labels_batch = batch_generator.next()
            loss = one_training_step(model, images_batch, labels_batch)
            if batch_counter % 100 == 0:
                print(f"loss at batch {batch_counter}: {loss:.2f}")

In [87]:
from tensorflow.keras.datasets import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()



In [88]:
train_images = train_images.reshape((60000, 28* 28))
train_images = train_images.astype("float32") / 255
test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype("float32") / 255

In [89]:
fit(model, train_images, train_labels, epochs=10, batch_size=128)

Epoch 0
loss at batch 0: 6.56
loss at batch 100: 2.24
loss at batch 200: 2.20
loss at batch 300: 2.09
loss at batch 400: 2.20
Epoch 1
loss at batch 0: 1.89
loss at batch 100: 1.89
loss at batch 200: 1.82
loss at batch 300: 1.71
loss at batch 400: 1.81
Epoch 2
loss at batch 0: 1.57
loss at batch 100: 1.59
loss at batch 200: 1.50
loss at batch 300: 1.43
loss at batch 400: 1.50
Epoch 3
loss at batch 0: 1.32
loss at batch 100: 1.35
loss at batch 200: 1.24
loss at batch 300: 1.21
loss at batch 400: 1.27
Epoch 4
loss at batch 0: 1.12
loss at batch 100: 1.17
loss at batch 200: 1.04
loss at batch 300: 1.05
loss at batch 400: 1.10
Epoch 5
loss at batch 0: 0.98
loss at batch 100: 1.03
loss at batch 200: 0.90
loss at batch 300: 0.93
loss at batch 400: 0.98
Epoch 6
loss at batch 0: 0.87
loss at batch 100: 0.93
loss at batch 200: 0.80
loss at batch 300: 0.84
loss at batch 400: 0.89
Epoch 7
loss at batch 0: 0.79
loss at batch 100: 0.84
loss at batch 200: 0.72
loss at batch 300: 0.77
loss at batch 40

In [90]:
predictions = model(test_images)
predictions = predictions.numpy()
predicted_labels = np.argmax(predictions, axis=1)
matches = predicted_labels == test_labels
print(f"accuracy: {matches.mean():.2f}")

accuracy: 0.82
