<a href="https://colab.research.google.com/github/deyabhishek79/MtechProject/blob/main/Disney-Image-Captioning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
from os import listdir
from pickle import dump
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.layers import Input, Reshape, Concatenate
import numpy as np
import string
import glob
from tensorflow.keras.models import Model

In [2]:
# load an image from filepath
def load_image(path):
    img = load_img(path, target_size=(224,224))
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)
    return np.asarray(img)

In [19]:

# extract features from each photo in the directory
def extract_features(directory,is_attention=False):
  # load the model
  if is_attention:
    model = VGG16()
    model.layers.pop()
    # extract final 49x512 conv layer for context vectors
    final_conv = Reshape([49,512])(model.layers[-4].output)
    model = Model(inputs=model.inputs, outputs=final_conv)
    print(model.summary())
    features = dict()
  else:
    model = VGG16()
    # re-structure the model
    model.layers.pop()
    model = Model(inputs=model.inputs, outputs=model.layers[-1].output)
    print(model.summary())
    # extract features from each photo
    features = dict()

  for name in glob.glob(directory):
    # ignore README
    if name == 'README.md':
      continue
    filename = name
    image = load_image(filename)
    # extract features
    feature = model.predict(image, verbose=0)
    # get image id
    image_id = name.split('.')[0]
    # store feature
    features[image_id] = feature
    print('>%s' % name)
  return features

In [4]:

# load doc into memory
def load_doc(filename):
  # open the file as read only
  file = open(filename, 'r')
  # read all text
  text = file.read()
  # close the file
  file.close()
  return text

In [5]:
# extract descriptions for images
def load_descriptions(doc):
  mapping = dict()
  # process lines
  for line in doc.split('\n'):
    # split line by white space
    tokens = line.split()
    if len(line) < 2:
      continue
    # take the first token as the image id, the rest as the description
    image_id, image_desc = tokens[0], tokens[1:]
    # remove filename from image id
    image_id = image_id.split('.')[0]
    # convert description tokens back to string
    image_desc = ' '.join(image_desc)
    # create the list if needed
    if image_id not in mapping:
      mapping[image_id] = list()
    # store description
    mapping[image_id].append(image_desc)
  return mapping

In [6]:

def clean_descriptions(descriptions):
  # prepare translation table for removing punctuation
  table = str.maketrans('', '', string.punctuation)
  for key, desc_list in descriptions.items():
    for i in range(len(desc_list)):
      desc = desc_list[i]
      # tokenize
      desc = desc.split()
      # convert to lower case
      desc = [word.lower() for word in desc]
      # remove punctuation from each token
      desc = [w.translate(table) for w in desc]
      # remove hanging 's' and 'a'
      desc = [word for word in desc if len(word)>1]
      # remove tokens with numbers in them
      desc = [word for word in desc if word.isalpha()]
      # store as string
      desc_list[i] =  ' '.join(desc)

In [7]:

# convert the loaded descriptions into a vocabulary of words
def to_vocabulary(descriptions):
  # build a list of all description strings
  all_desc = set()
  for key in descriptions.keys():
    [all_desc.update(d.split()) for d in descriptions[key]]
  return all_desc

In [8]:
# save descriptions to file, one per line
def save_descriptions(descriptions, filename):
  lines = list()
  for key, desc_list in descriptions.items():
    for desc in desc_list:
      lines.append(key + ' ' + desc)
  data = '\n'.join(lines)
  file = open(filename, 'w')
  file.write(data)
  file.close()

In [9]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [22]:

# extract features from all images

directory = '/content/drive/MyDrive/Images_Dataset_temp/*jpg'
features = extract_features(directory)
print('Extracted Features: %d' % len(features))
# save to file
dump(features, open('/content/drive/MyDrive/Images_Dataset_temp/features.pkl', 'wb'))

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
>/content/drive/MyDrive/Images_Dataset_temp/3138562460_44227a35cf.jpg
>/content/drive/MyDrive/Images_Dataset_temp/2969380952_9f1eb7f93b.jpg
>/content/drive/MyDrive/Images_Dataset_temp/3358682439_be4b83544c.jpg
>/content/drive/MyDrive/Images_Dataset_temp/447722389_4b51b7e13d.jpg
>/content/drive/MyDrive/Images_Dataset_temp/3516299821_8f0375d221.jpg
>/content/drive/MyDrive/Images_Dataset_temp/2891240104_6755281868.jpg
>/content/drive/MyDrive/Images_Dataset_temp/508958120_afe274f726.jpg
>/content/drive/MyDrive/Images_Dataset_temp/333973142_abcd151002.jpg
>/content/drive/MyDrive/Images_Dataset_temp/2374247382_023a86b9ca.jpg
>/content/drive/MyDrive/Images_Dataset_temp/3626642428_3396568c3c.jpg
>/content/drive/MyDrive/Images_Dataset_temp/3412450683_7da035f2de.jpg
>/content/drive/MyDrive/Images_Dataset_temp/2260649048_ae45d17e68.jpg
>/content/drive/MyDrive/Images_Dataset_temp/2043427251_83b746da8e.jpg
>/content/drive/MyDrive/Imag

In [26]:
!pip install load_data

[31mERROR: Could not find a version that satisfies the requirement load_data (from versions: none)[0m
[31mERROR: No matching distribution found for load_data[0m


In [25]:
import load_data as ld
import generate_model as gen
from tensorflow.keras.callbacks import ModelCheckpoint
from pickle import dump

ModuleNotFoundError: ignored

In [None]:
from pickle import load
import argparse

# load doc into memory
def load_doc(filename):
  # open the file as read only
  file = open(filename, 'r')
  # read all text
  text = file.read()
  # close the file
  file.close()
  return text

# load a pre-defined list of photo identifiers
def load_set(filename):
  doc = load_doc(filename)
  dataset = list()
  # process line by line
  for line in doc.split('\n'):
    # skip empty lines
    if len(line) < 1:
      continue
    # get the image identifier
    identifier = line.split('.')[0]
    dataset.append(identifier)
  return set(dataset)

# split a dataset into train/test elements
def train_test_split(dataset):
  # order keys so the split is consistent
  ordered = sorted(dataset)
  # return split dataset as two new sets
  return set(ordered[:100]), set(ordered[100:200])

# load clean descriptions into memory
def load_clean_descriptions(filename, dataset):
  # load document
  doc = load_doc(filename)
  descriptions = dict()
  for line in doc.split('\n'):
    # split line by white space
    tokens = line.split()
    # split id from description
    image_id, image_desc = tokens[0], tokens[1:]
    # skip images not in the set
    if image_id in dataset:
      # create list
      if image_id not in descriptions:
        descriptions[image_id] = list()
      # wrap description in tokens
      desc = 'startseq ' + ' '.join(image_desc) + ' endseq'
      # store
      descriptions[image_id].append(desc)
  return descriptions

# load photo features
def load_photo_features(filename, dataset):
  # load all features
  all_features = load(open(filename, 'rb'))
  # filter features
  features = {k: all_features[k] for k in dataset}
  return features

def prepare_dataset(data='dev'):

  assert data in ['dev', 'train', 'test']

  train_features = None
  train_descriptions = None

  if data == 'dev':
    # load dev set (1K)
    filename = 'Flickr8k_text/Flickr_8k.devImages.txt'
    dataset = load_set(filename)
    print('Dataset: %d' % len(dataset))

    # train-test split
    train, test = train_test_split(dataset)
    #print('Train=%d, Test=%d' % (len(train), len(test)))

    # descriptions
    train_descriptions = load_clean_descriptions('models/descriptions.txt', train)
    test_descriptions = load_clean_descriptions('models/descriptions.txt', test)
    print('Descriptions: train=%d, test=%d' % (len(train_descriptions), len(test_descriptions)))

    # photo features
    train_features = load_photo_features('models/features.pkl', train)
    test_features = load_photo_features('models/features.pkl', test)
    print('Photos: train=%d, test=%d' % (len(train_features), len(test_features)))

  elif data == 'train':
    # load training dataset (6K)
    filename = 'Flickr8k_text/Flickr_8k.trainImages.txt'
    train = load_set(filename)

    filename = 'Flickr8k_text/Flickr_8k.devImages.txt'
    test = load_set(filename)
    print('Dataset: %d' % len(train))

    # descriptions
    train_descriptions = load_clean_descriptions('models/descriptions.txt', train)
    test_descriptions = load_clean_descriptions('models/descriptions.txt', test)
    print('Descriptions: train=%d, test=%d' % (len(train_descriptions), len(test_descriptions)))

    # photo features
    train_features = load_photo_features('models/features.pkl', train)
    test_features = load_photo_features('models/features.pkl', test)
    print('Photos: train=%d, test=%d' % (len(train_features), len(test_features)))

  elif data == 'test':
    # load test set
    filename = 'Flickr8k_text/Flickr_8k.testImages.txt'
    test = load_set(filename)
    print('Dataset: %d' % len(test))
    # descriptions
    test_descriptions = load_clean_descriptions('models/descriptions.txt', test)
    print('Descriptions: test=%d' % len(test_descriptions))
    # photo features
    test_features = load_photo_features('models/features.pkl', test)
    print('Photos: test=%d' % len(test_features))

  return (train_features, train_descriptions), (test_features, test_descriptions)

if __name__ == '__main__':
  parser = argparse.ArgumentParser(description='Generate dataset features')
  parser.add_argument("-t", "--train", action='store_const', const='train',
    default = 'dev', help="Use large 6K training set")
  args = parser.parse_args()
  prepare_dataset(args.train)

In [None]:
def train_model(weight = None, epochs = 10):
  # load dataset
  data = ld.prepare_dataset('train')
  train_features, train_descriptions = data[0]
  test_features, test_descriptions = data[1]

  # prepare tokenizer
  tokenizer = gen.create_tokenizer(train_descriptions)
  # save the tokenizer
  dump(tokenizer, open('models/tokenizer.pkl', 'wb'))
  # index_word dict
  index_word = {v: k for k, v in tokenizer.word_index.items()}
  # save dict
  dump(index_word, open('models/index_word.pkl', 'wb'))

  vocab_size = len(tokenizer.word_index) + 1
  print('Vocabulary Size: %d' % vocab_size)

  # determine the maximum sequence length
  max_length = gen.max_length(train_descriptions)
  print('Description Length: %d' % max_length)

  # generate model
  model = gen.define_model(vocab_size, max_length)

  # Check if pre-trained weights to be used
  if weight != None:
    model.load_weights(weight)

  # define checkpoint callback
  filepath = 'models/model-ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'
  checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1,
                save_best_only=True, mode='min')

  steps = len(train_descriptions)
  val_steps = len(test_descriptions)
  # create the data generator
  train_generator = gen.data_generator(train_descriptions, train_features, tokenizer, max_length)
  val_generator = gen.data_generator(test_descriptions, test_features, tokenizer, max_length)

  # fit model
  model.fit_generator(train_generator, epochs=epochs, steps_per_epoch=steps, verbose=1,
        callbacks=[checkpoint], validation_data=val_generator, validation_steps=val_steps)

  try:
      model.save('models/wholeModel.h5', overwrite=True)
      model.save_weights('models/weights.h5',overwrite=True)
  except:
      print("Error in saving model.")
  print("Training complete...\n")

if __name__ == '__main__':
    train_model(epochs=20)