# Polar Express

Stefano Volpe #0000969766

University of Bologna

Introduction to Machine Learning

a.y. 2022/23

## Imports

In [13]:
import numpy as np
import tensorflow as tf

from keras.activations import elu, gelu, relu, sigmoid, softmax, softsign, \
  swish, tanh
from keras.callbacks import EarlyStopping
from keras.layers import Concatenate, Dense, Input, Normalization
from keras.losses import CategoricalCrossentropy
from keras.models import Model
from keras.optimizers import Nadam
from keras.utils import plot_model

## Generator

In [14]:
def polar_generator(batchsize, grid = (10, 10), noise = .002, flat = False):
  while True:
    x = np.random.rand(batchsize)
    y = np.random.rand(batchsize)
    out = np.zeros((batchsize, grid[0], grid[1]))
    xc = (x * grid[0]).astype(int)
    yc = (y * grid[1]).astype(int)
    for b in range(batchsize):
      out[b,xc[b],yc[b]] = 1
    # compute rho and theta and add some noise
    rho = np.sqrt(x ** 2 + y ** 2) + np.random.normal(scale = noise)
    theta = np.arctan(y / np.maximum(x, .00001)) + \
      np.random.normal(scale = noise)
    if flat:
      out = np.reshape(out, (batchsize, grid[0]*grid[1]))
    yield ((theta,rho),out)

## Dataset

The project requirements ask for a size of the validation greater or equal than 20000. In order for it to be one fourth of the training set (which is a good rule of thumb in general), 1000000 was chosen. 

In [15]:
training_set_size, validation_set_size = 4000000, 1000000

(training_theta, training_rho), training_maps = next(polar_generator(training_set_size, flat = True))
(validation_theta, validation_rho), validation_maps = next(polar_generator(training_set_size, flat = True))

## Metrics

The project requirements ask to compute the categorical accuracy of your model on your own, rather than using Keras's implementation.

In [20]:
def argmax_axis_1(input: tf.Tensor) -> int:
  return tf.argmax(input, axis = 1)

def my_categorical_accuracy(y_true : tf.Tensor, y_pred : tf.Tensor) -> tf.float64:
  # The right categories (according to our ground truth)
  y_true_argmax = argmax_axis_1(y_true)
  # The predictions our model assert with the most confidence
  y_pred_argmax = argmax_axis_1(y_pred)
  # Element-wise equality
  equalities = tf.equal(y_true_argmax, y_pred_argmax)
  # Since True converts to 1.0, accuracy and arithmetic mean are
  # equivalent
  equalities = tf.cast(equalities, tf.float64)
  return tf.reduce_mean(equalities)

## Model

In [18]:
def makeNetwork() -> Model:
  theta_input = Input(shape = (1, ))
  theta_norm = Normalization(axis = None)
  theta_norm.adapt(training_theta)
  theta_norm = theta_norm(theta_input)
  theta_branch = Dense(2, activation = softsign)(theta_norm)
  theta_branch = Dense(4, activation = tanh)(theta_branch)
  theta_branch = Dense(4, activation = sigmoid)(theta_branch)

  rho_input = Input(shape = (1,))
  rho_norm = Normalization(axis = None)
  rho_norm.adapt(training_rho)
  rho_norm = rho_norm(rho_input)

  rho_branch = Dense(4, activation = softsign)(rho_norm)
  rho_branch = Dense(4, activation = swish)(rho_branch)
  rho_branch = Dense(4, activation = tanh)(rho_branch)
  rho_branch = Dense(4, activation = elu)(rho_branch)

  concatenate_layer = Concatenate()([theta_branch, rho_branch])
  output = Dense(8, activation = swish)(concatenate_layer)
  output = Dense(16, activation = relu)(output)
  output = Dense(4, activation = gelu)(output)
  output = Dense(100, activation = softmax)(output)
  return Model([theta_input, rho_input], output)

network = makeNetwork()
network.build((None, 2))
network.summary(show_trainable = False)
plot_model(
  network,
  show_shapes = True,
  show_dtype = True,
  show_layer_activations = True,
)
network.compile(
  Nadam(),
  CategoricalCrossentropy(),
  metrics = [my_categorical_accuracy]
)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 normalization_2 (Normalization  (None, 1)           3           ['input_3[0][0]']                
 )                                                                                                
                                                                                                  
 normalization_1 (Normalization  (None, 1)           3           ['input_2[0][0]']            

## Training and evaluation

In [None]:
batch_size = 4096
epochs = 150
verbose = 2

network.fit(
  (training_theta, training_rho),
  training_maps,
  batch_size,
  epochs,
  verbose,
  [EarlyStopping(monitor = 'val_loss', patience = 4)],
  validation_data = ((validation_theta, validation_rho), validation_maps)
)

<bound method Model.fit of <keras.engine.functional.Functional object at 0x7fa1f3d15730>>
