<a href="https://colab.research.google.com/github/jonitorta/Ejercicios_Machine_learning./blob/main/First_neural_net.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [85]:
import tensorflow as tf
import math

In [86]:
#Here we create a dense layer
class NaiveDense:
  """ Dense layer creation.
  Attributes : 
    activation : Linear transformation of the input vector to output vector
    input_size : input_size
    output_size : output_size
  """
  def __init__(self, input_size, output_size, activation):
    self.activation = activation
    w_shape = (input_size, output_size)
    #Here we create a matrix with random values for each weight
    w_initial_value = tf.random.uniform(w_shape, minval = 0, maxval = 1e-1)
    #Assing weights
    self.W = tf.Variable(w_initial_value)
    #Here we create a 0-vector of shape output, this vector is 
    #bias vector 
    b_shape = output_size
    b_initial_value = tf.zeros(b_shape)
    #Assing bias vector
    self.b = tf.Variable(b_initial_value)

  def __call__(self, inputs):
    """ 
    Transformation methodology
    Given weight matrix(W), inputs vector(inputs) and
    bias vector(b) returns a vector returns 
     W*inputs + b 
    """
    return self.activation(tf.matmul(inputs, self.W) + self.b)
 #This decorator helps to assing self.W and self.b and 
 #don't let this attributes change outside the class
  @property
  def weights(self):
    return [self.W, self.b]

In [87]:
class NaiveSequential:
  """ 
  Stack layers 
  Given a list of layers this class stack them to form
  a neural network
  """
  def __init__(self, layers):
    self.layers = layers

  def __call__(self, inputs):
    """
    Transform input vector in neural network
    Given an input vector computes transformation on first layer
    then use that tranformed vector from first layer as input for the second
    layer and repreat until output vector.
    """
    x = inputs
    for layer in self.layers:
      x = layer(x)
    return x

  @property
  def weights(self):
    """Return weights sum of the neural net"""
    weights = []
    for layer in self.layers:
      weights += layer.weights
    return weights

In [88]:
#Define the structure of the net
#Here is 28*28->512->10
model = NaiveSequential([
                         NaiveDense(input_size=28*28, output_size=512, activation=tf.nn.relu),
                         NaiveDense(input_size=512, output_size=10, activation=tf.nn.softmax)
])
assert len(model.weights) == 4

In [89]:
class BatchGenerator:

  def __init__(self, images, labels, batch_size = 128):
    """
    Generates batch from a training set of batch size 128 (default)
    Given images, labels and a batch size generates subsets of size batch size.
    """
    assert len(images) == len(labels)
    self.index = 0
    self.images = images
    self.labels = labels
    self.batch_size = batch_size
    self.num_batches = math.ceil(len(images)/ batch_size) #math.ceil rounds up to the closest integer for example math.celi(3/2) = 2 

  def next(self):
    """
    Runs over images and segment them based on batch size then update index to
    cover the next batch of images.
    """
    images = self.images[self.index: self.index + self.batch_size]
    labels = self.labels[self.index : self.index + self.batch_size]
    self.index += self.batch_size
    return images, labels

In [90]:
def one_training_step(model, images_batch, labels_batch):
  """
  Computes loss of forward propagation.
  Given images batch, labels and model(neural net) this function computes
  the predicitons, the averge loss(error) and the gradient of average loss
  with respect to the weights, with this gradient we can do gradient desent
  methodology and compute new weights with update_weight func
  """
  with tf.GradientTape() as tape: #Gradient tape helps doing the differentiation part 
    predictions = model(images_batch)
    per_sample_losses = tf.keras.losses.sparse_categorical_crossentropy(  #Here we compute the loss with a crossentropy func
                                                                          # as a cost func
        labels_batch, predictions
    )
    average_loss = tf.reduce_mean(per_sample_losses)
  gradients = tape.gradient(average_loss, model.weights) #Here we compute the gradient with gradient tape
  update_weights(gradients, model.weights)
  return average_loss

In [91]:
learning_rate = 1e-3

In [92]:
def update_weights(gradients, weights):
  """
  Update all weights with the gradient
  Given a weights matrix, learning rate and gradient computes new weights 
  """
  for g,w in zip(gradients, weights):
    w.assign_sub(g*learning_rate) #assing_sub is += for matrix 

In [93]:
def fit(model, images, labels, epochs, batch_size = 128):
  """
  Fit the model to train data.
  """
  for epoch_couter in range(epochs): 
    print(f"Epoch {epoch_couter}")
    batch_generator = BatchGenerator(images,labels) #Segment the data 
    for batch_couter in range(batch_generator.num_batches): 
      images_batch, labels_batch = batch_generator.next()   #Take one batch of data
      loss = one_training_step(model, images_batch, labels_batch) #Train model on batch 
      if batch_couter % 100 == 0:
        print(f"loss at batch {batch_couter}: {loss:.2f}")

In [94]:
#Import dataset
train, test = tf.keras.datasets.mnist.load_data(
    path='mnist.npz'
)

In [95]:
(train_images, train_labels) = train
(test_images, test_labels) = test

In [96]:
#Reshape from matrix to a vector and conver to float
train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype("float32") / 255
test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype("float32") / 255

In [97]:
#Train the model
fit(model, train_images, train_labels, epochs=10, batch_size=128)

Epoch 0
loss at batch 0: 3.81
loss at batch 100: 2.24
loss at batch 200: 2.21
loss at batch 300: 2.10
loss at batch 400: 2.18
Epoch 1
loss at batch 0: 1.92
loss at batch 100: 1.87
loss at batch 200: 1.83
loss at batch 300: 1.71
loss at batch 400: 1.78
Epoch 2
loss at batch 0: 1.59
loss at batch 100: 1.57
loss at batch 200: 1.51
loss at batch 300: 1.43
loss at batch 400: 1.47
Epoch 3
loss at batch 0: 1.33
loss at batch 100: 1.33
loss at batch 200: 1.24
loss at batch 300: 1.21
loss at batch 400: 1.25
Epoch 4
loss at batch 0: 1.13
loss at batch 100: 1.15
loss at batch 200: 1.05
loss at batch 300: 1.05
loss at batch 400: 1.09
Epoch 5
loss at batch 0: 0.99
loss at batch 100: 1.01
loss at batch 200: 0.91
loss at batch 300: 0.93
loss at batch 400: 0.97
Epoch 6
loss at batch 0: 0.88
loss at batch 100: 0.90
loss at batch 200: 0.81
loss at batch 300: 0.84
loss at batch 400: 0.89
Epoch 7
loss at batch 0: 0.80
loss at batch 100: 0.82
loss at batch 200: 0.73
loss at batch 300: 0.77
loss at batch 40

In [98]:
#Make predictions and compare vs real cat
import numpy as np
predictions = model(test_images)
predictions = predictions.numpy()
predicted_labels = np.argmax(predictions, axis=1)
matches = predicted_labels == test_labels
print(f"accuracy: {matches.mean():.2f}")

accuracy: 0.81
