In [2]:
import numpy as np
import os
import PIL
import PIL.Image
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras as k
from tensorflow.keras import layers as kl

from capsnet import nn, layers, losses
from capsnet.layers import ConvCaps2D, DenseCaps

In [3]:
batch_size = 12
img_height = 32
img_width = 32

In [None]:
data_dir = 'D:/Projects/Research/MalayalamOCR/Handwritten-Dataset/dataset_u4/'
train_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="training",
  seed=124,
  image_size=(img_height, img_width),
  batch_size=batch_size,
color_mode = 'grayscale',
label_mode='categorical')

In [None]:
val_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="validation",
  seed=124,
  image_size=(img_height, img_width),
  batch_size=batch_size,
color_mode = 'grayscale',
label_mode='categorical')

In [None]:
train_ds = train_ds.as_numpy_iterator()

In [None]:
train_ds.shape

In [None]:
val_batches = tf.data.experimental.cardinality(val_ds)
test_dataset = val_ds.take(val_batches // 5)
val_ds = val_ds.skip(val_batches // 5)

In [None]:
class_names = train_ds.class_names


In [None]:
# normalization_layer = tf.keras.layers.Rescaling(1./255)
# train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [None]:
num_classes = len(class_names)
IMG_SHAPE = (img_width, img_height, 1)

In [4]:
from lib.utils import *
(X_train, y_train), (X_test, y_test), (X_val, y_val) = load_32x32_min_train_test_val_data()

  dataset = np.array(dataset)


## CapsNet


In [5]:
def get_model(name, input_shape, num_classes) -> k.Model:
    if name == "original":
        return original_model(name, input_shape, num_classes)
    elif name == "deepcaps":
        return deep_caps_model(name, input_shape, num_classes)
    else:
        sys.exit(1)


def original_model(name, input_shape, num_classes) -> k.Model:
    inl = kl.Input(shape=input_shape, name='input')
    normalization_layer = tf.keras.layers.Rescaling(1./255) (inl)
    
    # relu convolution for feature extraction
    nl = kl.Conv2D(filters=256, kernel_size=(9, 9), strides=(
        1, 1), activation='relu', name='conv')(normalization_layer)
    # convert to capsule domain
    nl = ConvCaps2D(filters=32, filter_dims=8, kernel_size=(
        9, 9), strides=(2, 2), name='conv_caps_2d')(nl)
    nl = kl.Lambda(nn.squash)(nl)
    # dense layer for dynamic routing
    nl = DenseCaps(caps=num_classes, caps_dims=16,
                   routing_iter=3, name='dense_caps')(nl)
    nl = kl.Lambda(nn.squash)(nl)
    pred = kl.Lambda(nn.norm, name='pred')(nl)
    recon = fully_connected_decoder(input_shape)(nl)
    return k.Model(inputs=inl, outputs=[pred, recon], name=name)


def fully_connected_decoder(target_shape):
    def decoder(input_tensor):
        nl = nn.MaskCID(name="dc_masking")(input_tensor)
        nl = kl.Dense(512, activation='relu', name="dc_dense_1")(nl)
        nl = kl.Dense(1024, activation='relu', name="dc_dense_2")(nl)
        nl = kl.Dense(tf.reduce_prod(target_shape),
                      activation='sigmoid', name="dc_dense_3")(nl)
        nl = kl.Reshape(target_shape, name='recon')(nl)
        return nl

    return decoder


def deep_caps_model(name, input_shape, num_classes) -> k.Model:
    inl = k.layers.Input(shape=input_shape, name='input')
    normalization_layer = tf.keras.layers.Rescaling(1./255) (inl)
    kernel_size = (3, 3)
    # relu convolution for feature extraction
    nl = kl.Conv2D(filters=128, kernel_size=kernel_size, strides=(
        1, 1), activation='relu', padding='same', name='conv')(normalization_layer)
    # residual capsule block 1
    l2 = dense_caps_block(filters=16, filter_dims=8,
                          kernel_size=kernel_size, strides=(2, 2), routing_iter=3)(nl)
    # residual capsule block 2
    l3 = dense_caps_block(filters=16, filter_dims=16,
                          kernel_size=kernel_size, strides=(2, 2), routing_iter=3)(l2)
    # flatten capsules
    nl = layers.FlattenCaps(caps=num_classes, name='cap1_flatten')(l3)
    pred = k.layers.Lambda(nn.norm, name='pred')(nl)
    recon = conv_decoder(target_shape=input_shape)(nl)
    return k.models.Model(inputs=inl, outputs=[pred, recon], name=name)


def dense_caps_block(filters, filter_dims, kernel_size, strides, routing_iter):
    def block(il):
        l0 = layers.ConvCaps2D(filters, filter_dims,
                               kernel_size, strides, padding='same')(il)
        l1 = layers.ConvCaps3D(
            filters, filter_dims, routing_iter, kernel_size, (1, 1), padding='same')(l0)
        l2 = kl.Concatenate(axis=-1)([l0, l1])
        return kl.Lambda(nn.squash)(l2)

    return block


def conv_decoder(target_shape):
    conv_params = {'kernel_size': (3, 3), 'strides': (
        2, 2), 'activation': 'relu', 'padding': 'same'}
    W, D, N = target_shape[0], target_shape[2], 0
    while W // (2 ** N) > 4 and W % (2 ** N) == 0:
        N = N + 1
    N = N - 1
    W_S = W // (2 ** N)

    def decoder(input_tensor):
        nl = nn.MaskCID(name="dc_masking")(input_tensor)
        nl = kl.Dense(W_S * W_S * D, name="dc_dense")(nl)
        nl = kl.BatchNormalization(momentum=0.8, name="dc_batch_norm")(nl)
        nl = kl.Reshape((W_S, W_S, D), name="dc_reshape")(nl)
        for i in range(N - 1):
            nl = kl.Conv2DTranspose(
                filters=64 * (N - i), **conv_params, name=f"decoder_dconv_{i + 1}")(nl)
        nl = kl.Conv2DTranspose(filters=D, **conv_params, name="recon")(nl)
        return nl

    return decoder

In [6]:
lr = 0.0005
b1 = 0.9
b2 = 0.999 
ep = 1e-07
EPOCHS = 20

In [14]:
  model = get_model(name='deepcaps', input_shape=(32,32,1), num_classes=121)
  model.compile(optimizer=k.optimizers.Adam(learning_rate=0.001, clipnorm=1.0, clipvalue=0.5),
                loss=[lambda a, b: losses.margin_loss(a, b, 0.9, 0.01), 'mse'],
                loss_weights=[1, 5e-3],
                metrics={'pred': 'acc'})
  model.summary()

Model: "deepcaps"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              [(None, 32, 32, 1)]  0                                            
__________________________________________________________________________________________________
rescaling_1 (Rescaling)         (None, 32, 32, 1)    0           input[0][0]                      
__________________________________________________________________________________________________
conv (Conv2D)                   (None, 32, 32, 128)  1280        rescaling_1[0][0]                
__________________________________________________________________________________________________
conv_caps2d_2 (ConvCaps2D)      (None, 16, 16, 16, 8 9344        conv[0][0]                       
___________________________________________________________________________________________

In [12]:
checkpoint_filepath = 'models/checkpoints/vggnet/checkpoint'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)


In [15]:
checkpoint = k.callbacks.ModelCheckpoint(checkpoint_filepath, save_best_only=True)
history = model.fit(X_train, [y_train, X_train],
              batch_size=20,
              epochs=100,
              validation_data=(X_test, (y_test, X_test)),
              callbacks=[checkpoint])

Epoch 1/100




INFO:tensorflow:Assets written to: models/checkpoints/vggnet\checkpoint\assets


INFO:tensorflow:Assets written to: models/checkpoints/vggnet\checkpoint\assets


Epoch 2/100
 193/2057 [=>............................] - ETA: 1:32 - loss: 0.0066 - pred_loss: 0.0064 - recon_loss: 0.0281 - pred_acc: 0.0067

KeyboardInterrupt: 

In [None]:

history = model.fit(
  train_ds,
  validation_data=val_ds,
      callbacks=[model_checkpoint_callback],
  epochs=EPOCHS
)

In [None]:
loss_p, accuracy = model.evaluate(val_ds)
accuracy = accuracy * 100
print(f"Loss: {loss_p:.2f}%")
print(f"Accuracy: {accuracy:.2f}%")

In [None]:
import datetime
today = datetime.datetime.now()
folder_name = f'{today.hour}-{today.minute}_{today.day:02d}-{today.month}-{today.year}'
path = os.path.join('results','model_summary','vggnet',folder_name)
if not os.path.exists(path):
    os.makedirs(path)
model_folder = path

In [None]:
import pickle
pickle.dump(history.history, open(model_folder+'\history.pickle', 'wb'))

In [None]:
# save as file
model.save(f'models/model_{num_classes}_vggnet_224x224_{today.hour}-{today.minute}_{today.day:02d}-{today.month}-{today.year}-acc{int(accuracy)}.h5')

In [None]:
# save as directory
model.save(model_folder+'\model')

In [None]:
# save weights
model.save_weights(model_folder+f'/model_{num_classes}-acc{int(accuracy)}.h5')

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,1.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.savefig(model_folder+'\combined.png')
plt.show()

In [None]:
plt.plot(history.history['loss'],label="Loss")
plt.plot(history.history['val_loss'],label="Val Loss")
plt.legend(loc="upper left")
plt.savefig(model_folder+'\loss.png')
plt.show()

In [None]:
plt.plot(history.history['accuracy'],label="Accuracy")
plt.plot(history.history['val_accuracy'],label="Val Accuracy")
plt.legend(loc="upper left")
plt.savefig(model_folder+'\\accuracy.png')
plt.show()

### Fine tuning

In [None]:
# backup 
og_model = model

In [None]:
print("Number of layers in the base model: ", len(basemodel.layers))

In [None]:
basemodel.trainable = True 

In [None]:
# Fine-tune from this layer onwards
fine_tune_at = 10

# Freeze all the layers before the `fine_tune_at` layer
for layer in basemodel.layers[:fine_tune_at]:
  layer.trainable =  False

In [None]:
lr = 1e-05
b1 = 0.9
b2 = 0.999 
ep = 1e-07

In [None]:
checkpoint_filepath = 'models/checkpoints/vggnet'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)
model.compile(
  optimizer=tf.keras.optimizers.Adam(learning_rate=lr, beta_1=b1,beta_2=b2, epsilon=ep, decay=0.0),
  loss=tf.losses.SparseCategoricalCrossentropy(from_logits=False),
  metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
fine_tune_epochs = 4
total_epochs =  EPOCHS + fine_tune_epochs
history_fine = model.fit(train_ds,
                         epochs=total_epochs,
                         initial_epoch=history.epoch[-1],
                        callbacks=[model_checkpoint_callback],
                         validation_data=val_ds)

In [None]:
model.load_weights(checkpoint_filepath)

In [None]:
acc += history_fine.history['accuracy']
val_acc += history_fine.history['val_accuracy']

loss += history_fine.history['loss']
val_loss += history_fine.history['val_loss']

In [None]:

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.ylim([0.8, 1])
plt.plot([EPOCHS-1,EPOCHS-1],
          plt.ylim(), label='Start Fine Tuning')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.ylim([0, 1.0])
plt.plot([EPOCHS-1,EPOCHS-1],
         plt.ylim(), label='Start Fine Tuning')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.savefig(model_folder+'\combined_finetuned.png')
plt.show()

In [None]:
loss_finetuned, accuracy_finetuned = model.evaluate(test_dataset)
accuracy_finetuned = accuracy_finetuned * 100
print('Test accuracy :', accuracy_finetuned)

In [None]:
def predict_word(num):
    pred = ''
    ch = class_names[num]
    lis = ch.split(' ')
    char_list = [chr(int(i)) for i in lis]
    pred += ''.join(char_list)
    return pred

In [None]:
image_batch, label_batch = test_dataset.as_numpy_iterator().next()
predictions = model.predict_on_batch(image_batch)

In [None]:
predictions = predictions.argmax(axis=-1)

In [None]:
from matplotlib.font_manager import FontProperties
from pathlib import Path
# point to the font location with an absolute path
nirm = Path('c:/Windows/Fonts/kartika.ttf')

# configure the Hindi font
mal_font = FontProperties(fname=nirm)

In [None]:
plt.figure(figsize=(10, 10))
for i in range(9):
  ax = plt.subplot(3, 3, i + 1)
  plt.imshow(image_batch[i].astype("uint8"))
  pred_word = predict_word(predictions[i])
  plt.title(pred_word,fontproperties=mal_font)
  plt.savefig(model_folder+'\prediction.jpg')
  plt.axis("off")

In [None]:
# save weights
model.save_weights(model_folder+f'/model_{num_classes}_vggnet_finetuned_{today.hour}-{today.minute}_{today.day:02d}-{today.month}-{today.year}-acc{int(accuracy)}.h5')

In [None]:
# save as directory
model.save(model_folder+f'\model_finetuned')

In [None]:
classes = np.array(class_names)

In [None]:
with open(model_folder+'\class_names.npy', 'wb') as f:
    np.save(f, classes)

In [None]:
pickle.dump(history_fine.history, open(model_folder+'\history_fine.pickle', 'wb'))

In [None]:
f = open(model_folder+"\\config.txt", "w")
f.writelines([f'LR: {lr}\n',f'Beta 1:{b1}\n',f'Beta 2:{b2}\n',f'Classes: {num_classes}',f'epsilon:{ep}\n',f'epochs:{EPOCHS}\n',f'accuracy: {accuracy}\n',f'accuracy finetuned: {accuracy_finetuned}\n'])
f.close()