In [3]:
import tensorflow as tf
import numpy as np
import os
from utils import *
import pickle
from model import ImageCaptioningModel
from sklearn.model_selection import train_test_split
from dataset import DataGenerator
import warnings
warnings.filterwarnings("ignore")


tf.get_logger().setLevel('ERROR')
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

In [4]:
CWD = os.getcwd()
BATCH_SIZE = 16

In [5]:
embeddings_path = os.path.join(CWD, 'glove.6B/glove.6B.100d.txt')
embeddings_index = load_embeddings(embeddings_path)
embedding_dim = 100
captions_path = os.path.join(CWD, 'data/captions.txt')
caption_dict = preprocess_captions(captions_path)
tokenizer = Tokenizer(filters='!"#$%&()*+,-./:;=?@[\\]^_`{|}~\t\n')
vocab = get_vocab(caption_dict)
tokenizer.fit_on_texts(vocab)
vocab_size = len(vocab) + 1
embedding_matrix = create_embedding_matrix(tokenizer, vocab_size, embeddings_index, embedding_dim)
max_seq_length = get_max_length(caption_dict)

In [7]:
features_path = os.path.join(CWD, 'encodings.pkl')
with open(features_path, 'rb') as f:
    features_dict = pickle.load(f)

In [8]:
embedding_matrix.shape

(4083, 100)

In [9]:
model = ImageCaptioningModel(vocab_size, max_seq_length, embedding_matrix, embedding_dim)

In [10]:
keys = list(caption_dict.keys())
train_keys, test_keys = train_test_split(keys, test_size = 0.2, random_state = 42)
train_keys, val_keys = train_test_split(train_keys, test_size = 0.25, random_state = 42)

In [11]:
train_captions = {k: caption_dict[k] for k in train_keys}
val_captions = {k: caption_dict[k] for k in val_keys}
test_captions = {k: caption_dict[k] for k in test_keys}

train_features = {k: features_dict[k] for k in train_keys}
val_features = {k: features_dict[k] for k in val_keys}
test_features = {k: features_dict[k] for k in test_keys}


In [12]:
train_data_generator = DataGenerator(train_captions, train_features, tokenizer, max_seq_length, vocab_size, batch_size=BATCH_SIZE)
val_data_generator = DataGenerator(val_captions, val_features, tokenizer, max_seq_length, vocab_size, batch_size=BATCH_SIZE)
test_data_generator = DataGenerator(test_captions, test_features, tokenizer, max_seq_length, vocab_size, batch_size=BATCH_SIZE)

In [14]:
model.compile(loss='categorical_crossentropy', 
              optimizer='adam', 
              metrics=['accuracy'])
# model = tf.keras.models.load_model(os.path.join(CWD, 'model'))

In [12]:
model_path = os.path.join(CWD, "model")
checkpoint = tf.keras.callbacks.ModelCheckpoint(model_path,
                            monitor="val_loss",
                            mode="min",
                            save_best_only = True,
                            verbose=0)

# earlystopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',min_delta = 0, patience = 5, verbose = 0, restore_best_weights=True)

# learning_rate_reduction = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', 
#                                             patience=3, 
#                                             verbose=0, 
#                                             factor=0.5, 
#                                             min_lr=1e-6)

In [13]:
NUM_EPOCHS = 50
with tf.device('/cpu:0'):
   history = model.fit(train_data_generator,
                     epochs = NUM_EPOCHS,
                  validation_data = val_data_generator,
                  callbacks = [checkpoint])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
  7/304 [..............................] - ETA: 3:40 - loss: 2.1381 - accuracy: 0.4341