In [0]:
from google.colab import files
files.upload()

In [0]:
!mkdir /root/.kaggle
!mv kaggle.json /root/.kaggle/
#download dataset
!kaggle datasets download -d hsankesara/flickr-image-dataset
!unzip -q flickr-image-dataset.zip
!mv flickr30k_images/results.csv .
!mkdir images
!mv flickr30k_images/flickr30k_images/* images/

#download glove
!wget -q http://nlp.stanford.edu/data/glove.6B.zip
!unzip -q glove.6B.zip -d glove
  
!rm -r flickr30k_images flickr-image-dataset.zip glove.6B.zip

Downloading flickr-image-dataset.zip to /content
100% 4.07G/4.08G [00:53<00:00, 51.1MB/s]
100% 4.08G/4.08G [00:53<00:00, 82.1MB/s]


In [0]:
import pandas as pd
import gc
import string

data = pd.read_csv('results.csv', sep='|')
data.columns = [ col.strip() for col in data.columns ]
data = data.dropna()

table = str.maketrans('', '', string.punctuation,)
def preprocesCaption( caption):
  caption = caption.lower() #to lower case
  caption = caption.translate(table) #remove punctuations
  caption = caption.split() # convert to words
  caption = [ w for w in caption if len(w) > 1 ] #remove dangling 'a' and 's'
  caption = [ w for w in caption if w.isalpha() ] #keep only words with alphabets
  return ' '.join(caption)

In [0]:
from tqdm import tqdm
from collections import defaultdict

images = defaultdict(list)
for img in tqdm(data.image_name.unique()):
  for comment in data[data.image_name == img ].comment.values:
    images[img].append( '<seq_start> '+preprocesCaption(comment)+' <seq_end>' )
print(len(images))
del data; gc.collect()

100%|██████████| 31783/31783 [05:04<00:00, 108.83it/s]


31783


7

In [0]:
import numpy as np
import os

GLOVE_PATH = os.path.join('glove', 'glove.6B.100d.txt')
EMBEDDING_DIM = 100

embeddings_index = {}
with open(GLOVE_PATH) as f:
  for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
embeddings_index['<unk>'] = np.zeros(EMBEDDING_DIM, dtype='float32')
embeddings_index['<seq_start>'] = np.zeros(EMBEDDING_DIM, dtype='float32')
embeddings_index['<seq_end>'] = np.ones(EMBEDDING_DIM, dtype='float32')
len(embeddings_index)

400003

In [0]:
from keras.preprocessing.sequence import pad_sequences

ind_to_word = dict(enumerate( embeddings_index.keys()  ))
word_to_ind = { w:i for i,w in ind_to_word.items() }

images_caption = defaultdict(list)
MAX_SEQ_LEN = 20

for img in images.keys():
  arr = []
  for caption in images[img]:
    arr.append( [ word_to_ind.get( word, word_to_ind['<unk>'] ) for word in caption.split() ] )
  images_caption[img] = pad_sequences( arr, maxlen = MAX_SEQ_LEN, padding='post', value=word_to_ind['<seq_end>'] )

Using TensorFlow backend.


In [0]:
from sklearn.model_selection import train_test_split
VALIDATION_SPLIT = 0.2

train, test = train_test_split( list(images_caption.keys()), test_size = VALIDATION_SPLIT )
len(train), len(test)

(25426, 6357)

In [0]:
from keras.preprocessing.image import img_to_array, array_to_img, load_img
from random import choice, sample

def create_batch( data, batch_size=128 ):
  c = np.zeros( (batch_size, 512) )
  s = np.zeros( (batch_size, 512) )
  while True:
    batch = [  ]
    imgs = sample( data, batch_size )
    cap = np.zeros( (batch_size,MAX_SEQ_LEN, EMBEDDING_DIM) )
    i=0
    for img in imgs:
      caption = choice( images_caption[img] )
      for j in range(MAX_SEQ_LEN):
        cap[ i, j, :] = embeddings_index[ ind_to_word[ caption[j] ] ]
      imgs[i] = img_to_array( load_img( os.path.join('images',imgs[i]), target_size=(224,224) ) )
      i+=1
    yield [np.array(imgs)/255,s,c], list(np.swapaxes(cap,0,1))

In [0]:
from keras.applications import VGG19
IMG_DIM = (224,224,3)

vgg = VGG19(weights='imagenet',include_top=True, input_shape=IMG_DIM)
vgg.layers.pop()
vgg.trainable=False
for layer in vgg.layers:
  layer.trainable=False

W0614 10:16:26.617032 139867797530496 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0614 10:16:26.668680 139867797530496 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0614 10:16:26.686076 139867797530496 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0614 10:16:26.750047 139867797530496 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3976: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.



Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels.h5


W0614 10:16:33.859360 139867797530496 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

W0614 10:16:33.861089 139867797530496 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:181: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.



In [0]:
from keras.layers import LSTM, Dense, Input, RepeatVector
from keras.models import Model

s0 = Input( (512,) )
c0 = Input( (512,) )

s=s0
c=c0

context = RepeatVector(MAX_SEQ_LEN)(vgg.layers[-1].output)

lstm = LSTM(512,return_state=True)
outputlayer = Dense(EMBEDDING_DIM, activation='softmax')

outputs=[]
for _ in range(MAX_SEQ_LEN):
  s, _ ,c = lstm( context, initial_state=[s,c] )
  out = outputlayer(s)
  outputs.append(out)

model = Model( [vgg.input,s0,c0], outputs )

In [0]:
from keras.optimizers import Adam
traingen = create_batch(train)
valgen = create_batch(test)

model.compile( Adam( 0.0004 ), loss='categorical_crossentropy')

In [0]:
model.fit_generator( traingen, steps_per_epoch = 500, epochs=10, validation_data=valgen, validation_steps=100, 
                    use_multiprocessing=True, verbose=1, workers=4 )



Epoch 1/10
Epoch 2/10
 64/500 [==>...........................] - ETA: 30:55 - loss: 3787.2914 - dense_1_loss: 486.3795

Process ForkPoolWorker-54:
Process ForkPoolWorker-53:
Process ForkPoolWorker-50:
Process ForkPoolWorker-52:
Process ForkPoolWorker-55:
Process ForkPoolWorker-49:
Process ForkPoolWorker-48:
Process ForkPoolWorker-51:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/p

Epoch 1/10


  File "/usr/lib/python3.6/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
KeyboardInterrupt
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
KeyboardInterrupt
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)


Epoch 1/10


KeyboardInterrupt


KeyboardInterrupt: ignored

In [0]:
"https://keras.io/examples/pretrained_word_embeddings/"

'https://keras.io/examples/pretrained_word_embeddings/'

In [0]:
"https://github.com/hlamba28/Automatic-Image-Captioning/blob/master/Automatic%20Image%20Captioning.ipynb"
"https://towardsdatascience.com/image-captioning-with-keras-teaching-computers-to-describe-pictures-c88a46a311b8"