# Task 2

## Objectives

Develop and train a CNN to tell the time from images of clocks.

The training set consists of 18000 grayscale 150x150 images.
The labels for each sample are represented by two integers 18000x2. That correspond to the hour and minute displayed by the clock.

The model should be created using these different solutions:

- Regression
- Classification
- Multi-head outputs
- Label transformation (optional)

In [45]:
import numpy as np
import tensorflow as tf
from sympy import cos, sin, pi
from pathlib import Path
from time import strftime

np.random.seed(42)

In [39]:
# Setup tensorboard directory
def get_run_logdir(root_logdir="logs"):
    return Path(root_logdir) / strftime("run_%Y_%m_%d_%H_%M_%S")

run_logdir = get_run_logdir()

In [40]:
X = np.load("./data/tell-the-time/images.npy")
y = np.load("./data/tell-the-time/labels.npy")

In [41]:
size = y.shape[0]
train_size = int(0.8 * size)
X_train, y_train = X[:train_size,:], y[:train_size,:]
X_test, y_test = X[train_size:,:], y[train_size:, :]

## Approaches

### Regression Model

#### Preprocess data

In [42]:
def preprocess_labels_regression(y):
    labels = [float(y_[0] + y_[1] / 60.0) for y_ in y]
    return np.array(labels)

In [43]:
y_train_regression = preprocess_labels_regression(y_train)
y_test_regression = preprocess_labels_regression(y_test)

In [49]:
# Create model
regression_model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(150,150,1)),
    tf.keras.layers.Normalization(),
    tf.keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding="same", activation='relu'),
    tf.keras.layers.Conv2D(filters=64, kernel_size=(3,3), padding="same", activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2,2)),
    tf.keras.layers.Conv2D(filters=128, kernel_size=(3,3), padding="same", activation='relu'),
    tf.keras.layers.Conv2D(filters=128, kernel_size=(3,3), padding="same", activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(3, 3), strides=(2,2)),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2,2)),
    tf.keras.layers.Conv2D(filters=256, kernel_size=(3,3), padding="same", activation='relu'),
    tf.keras.layers.Conv2D(filters=256, kernel_size=(3,3), padding="same", activation='relu'),
    tf.keras.layers.Conv2D(filters=256, kernel_size=(3,3), padding="same", activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Conv2D(filters=512, kernel_size=(3,3), padding="same", activation='relu'),
    tf.keras.layers.Conv2D(filters=512, kernel_size=(3,3), padding="same", activation='relu'),
    tf.keras.layers.Conv2D(filters=512, kernel_size=(3,3), padding="same", activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Conv2D(filters=512, kernel_size=(3,3), padding="same", activation='relu'),
    tf.keras.layers.Conv2D(filters=512, kernel_size=(3,3), padding="same", activation='relu'),
    tf.keras.layers.Conv2D(filters=512, kernel_size=(3,3), padding="same", activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(4096, activation='relu', 
                          kernel_regularizer=tf.keras.regularizers.L2(5e-4), 
                          kernel_initializer= tf.keras.initializers.GlorotNormal()),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(4096, activation='relu',
                          kernel_regularizer=tf.keras.regularizers.L2(5e-4),
                          kernel_initializer= tf.keras.initializers.GlorotNormal()),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(4096, activation='relu'),
    tf.keras.layers.Dense(1, activation='linear')
], name="ttt-vgg16-regressor")

In [50]:
# Compile the model
sdg_momentum = tf.keras.optimizers.SGD(momentum=0.9, learning_rate=0.01)
regression_model.compile(optimizer='adam', loss='mse', metrics=["RootMeanSquaredError"])

In [None]:
checkpoints_cb = tf.keras.callbacks.ModelCheckpoint('./checkpoints/ttt-vgg16-regressor/checkpoint', 
                                                    save_best_only=True, 
                                                    save_weights_only=True)

early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor="val_root_mean_squared_error", factor=0.1)

history = regression_model.fit(X_train, y_train_regression, epochs=50, callbacks=[reduce_lr, checkpoints_cb], validation_split=0.1)

In [54]:
regression_model.evaluate(X_test, y_test_regression)

2023-10-16 00:01:39.712063: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.




[60.409095764160156, 7.708273887634277]

In [55]:
regression_model.save("./models/ttt-vgg16-regressor", save_format='tf')



INFO:tensorflow:Assets written to: ./models/ttt-vgg16-regressor/assets


INFO:tensorflow:Assets written to: ./models/ttt-vgg16-regressor/assets


### Classification Model

### Multi-head Model

### Label Transformation

In [10]:
def encode_min(minute):
    angle = pi/2  - ((pi / 30) * minute)
    return np.array([float(cos(angle)), float(sin(angle))])

In [11]:
def encode_hour(hour):
    angle = pi/2  - ((pi / 6) * hour)
    return np.array([float(cos(angle)), float(sin(angle))])

In [12]:
minute_angles = np.array([encode_min(min) for min in range(1, 61)])
hour_angles = np.array([encode_hour(hour) for hour in range(0, 12)])

In [13]:
def decode_hour(cosX, sinX):
    x = np.array([cosX, sinX])
    distances = [np.square(angle[0] - x[0]) + np.square(angle[1] - x[1]) for angle in hour_angles]
    return np.argmin(distances)

In [14]:
def decode_min(cosX, sinX):
    x = np.array([cosX, sinX])
    distances = [np.square(angle[0] - x[0]) + np.square(angle[1] - x[1]) for angle in minute_angles]
    return np.argmin(distances)

In [15]:
encoded_hours = np.array([encode_hour(hour) for hour in y[:,0]])
encoded_minutes = np.array([encode_min(minute) for minute in y[:,1]])

In [16]:
y = np.hstack((encoded_hours, encoded_minutes))