In [0]:
from google.colab import drive
drive.mount("/content/drive/")

In [0]:
# map an integer to a word
def word_for_id(integer, tokenizer):
	for word, index in tokenizer.word_index.items():
		if index == integer:
			return word
	return None

In [0]:
# generate a description for an image
def generate_desc(model, tokenizer, photo, max_length):
	# seed the generation process
	in_text = 'startseq'
	# iterate over the whole length of the sequence
	for i in range(max_length):
		# integer encode input sequence
		sequence = tokenizer.texts_to_sequences([in_text])[0]
		# pad input
		sequence = pad_sequences([sequence], maxlen=max_length)
		# predict next word
		yhat = model.predict([photo,sequence], verbose=0)
		# convert probability to integer
		yhat = np.argmax(yhat)
		# map integer to word
		word = word_for_id(yhat, tokenizer)
		# stop if we cannot map the word
		if word is None:
			break
		# append as input for generating the next word
		in_text += ' ' + word
		# stop if we predict the end of the sequence
		if word == 'endseq':
			break
	return in_text

In [0]:
import IPython
from IPython.display import HTML, display, Javascript
from google.colab.output import eval_js
from base64 import b64decode
import numpy as np
import io
from PIL import Image


def take_photo(filename='photo.jpg', quality=0.8):
  js = Javascript('''
    async function takePhoto(quality) {
      const div = document.createElement('div');
      const capture = document.createElement('button');
      capture.textContent = 'Capture';
      div.appendChild(capture);

      const video = document.createElement('video');
      video.style.display = 'block';
      const stream = await navigator.mediaDevices.getUserMedia({video: true});

      document.body.appendChild(div);
      div.appendChild(video);
      video.srcObject = stream;
      await video.play();

      // Resize the output to fit the video element.
      google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);

      // Wait for Capture to be clicked.
      await new Promise((resolve) => capture.onclick = resolve);

      const canvas = document.createElement('canvas');
      canvas.width = video.videoWidth;
      canvas.height = video.videoHeight;
      canvas.getContext('2d').drawImage(video, 0, 0);
      stream.getVideoTracks()[0].stop();
      div.remove();
      return canvas.toDataURL('image/jpeg', quality);
    }
    ''')
  display(js)
  data = eval_js('takePhoto({})'.format(quality))
  binary = b64decode(data.split(',')[1])
  f = io.BytesIO(binary)
  return np.asarray(Image.open(f))

In [0]:
from keras.models import load_model,Model
from pickle import load
import matplotlib.pyplot as plt
from keras.applications.inception_v3 import InceptionV3,preprocess_input
from keras.preprocessing.image import load_img,img_to_array,save_img
from keras.preprocessing.sequence import pad_sequences

Using TensorFlow backend.


In [0]:
# extract features from each photo.
def extract_features(model):

  img_array=take_photo()
  save_img("photo.jpg",img_array)
  image=load_img("photo.jpg",target_size=(299,299))
  image=img_to_array(image)
  image=image.reshape((1,image.shape[0],image.shape[1],image.shape[2]))
  image=preprocess_input(image)
  feature=model.predict(image,verbose=0)
  return feature,img_array


In [0]:
feature_extracter=InceptionV3()
feature_extracter.layers.pop()
feature_extracter=Model(inputs=feature_extracter.inputs,outputs=feature_extracter.layers[-1].output)

In [0]:
# load the tokenizer
tokenizer = load(open('/content/drive/My Drive/tokenizer.pkl', 'rb'))
# pre-define the max sequence length (from training)
max_length = 34
# load the model
model = load_model("/content/drive/My Drive/image_captioning_model_inception_2.h5")

In [0]:
# load and prepare the photograph
photo,img = extract_features(feature_extracter)
# generate description
description = generate_desc(model, tokenizer, photo, max_length)

description=description.split()
output_text=" ".join(description[1:-1])

print(output_text)
plt.imshow(img)

plt.show()

<IPython.core.display.Javascript object>