In [3]:
!pip install opencv-python

Collecting opencv-python
  Downloading https://files.pythonhosted.org/packages/96/30/99bd865802cd5f425c42efd2ee4e10bd3bc605640008f03e3c72a1dbe320/opencv_python-4.0.0.21-cp36-cp36m-win_amd64.whl (30.4MB)
Installing collected packages: opencv-python
Successfully installed opencv-python-4.0.0.21


In [1]:
# -*- coding: utf-8 -*-
import cv2
import os
from tensorflow.keras.models import load_model
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.utils import CustomObjectScope
from tensorflow.keras.initializers import glorot_uniform
from pickle import load
from numpy import argmax
from tensorflow.keras.preprocessing.sequence import pad_sequences

def similarity(frame1, frame2):
    # Transforme image to grayscale
    gray1 = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
    gray2 = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
    hist1 = cv2.calcHist([gray1],[0],None,[256],[0,256])
    hist2 = cv2.calcHist([gray2],[0],None,[256],[0,256])
    comp = cv2.compareHist(hist1, hist2, 0)
    return comp

def extract_features(frame):
	# extract features from frame
	resized_image = cv2.resize(frame, (224, 224)) 
	image = img_to_array(resized_image)
	# reshape data for the model
	image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
	# prepare the image for the VGG model
	image = preprocess_input(image)
	# get features
	features = model_vgg16.predict(image, verbose=0)
	return features

# map an integer to a word
def word_for_id(integer, tokenizer):
	for word, index in tokenizer.word_index.items():
		if index == integer:
			return word
	return None
 
# generate a description for an image
def generate_desc(model, tokenizer, photo, max_length):
	# seed the generation process
	in_text = 'startseq'
	# iterate over the whole length of the sequence
	for i in range(max_length):
		# integer encode input sequence
		sequence = tokenizer.texts_to_sequences([in_text])[0]
		# pad input
		sequence = pad_sequences([sequence], maxlen=max_length)
		# predict next word
		yhat = model.predict([photo,sequence], verbose=0)
		# convert probability to integer
		yhat = argmax(yhat)
		# map integer to word
		word = word_for_id(yhat, tokenizer)
		# stop if we cannot map the word
		if word is None:
			break
		# append as input for generating the next word
		in_text += ' ' + word
		# stop if we predict the end of the sequence
		if word == 'endseq':
			break
	return in_text


os.chdir('/Users/Adrien Delpierre/Documents/Projet')
model_vgg16 = VGG16()
# remove the classifier layers
model_vgg16 = Model(inputs=model_vgg16.inputs, outputs=model_vgg16.layers[-2].output)

with CustomObjectScope({'GlorotUniform': glorot_uniform()}):
    model = load_model('model_19.h5')
    
    
tokenizer = load(open('tokenizer.pkl', 'rb'))
# pre-define the max sequence length (from training)
max_length = 40

# Text display
font                   = cv2.FONT_HERSHEY_SIMPLEX
bottomSideOfText       = (200,350)
topLeftCornerOfText    = (350, 10)
fontScale              = 1
fontColor              = (0,255,70)
lineType               = 2

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [2]:
cap = cv2.VideoCapture(0)
cap.set(3,224)
cap.set(4,224)
frameRate = cap.get(5)
_, prev_frame = cap.read()

compteur = 1

while(True):
    # Capture frame-by-frame
    _, frame = cap.read()
    # Our operations on the frame come here
    similar = similarity(frame, prev_frame)
        
    # Our operations on the frame come here
    if similar < 0.7:
        frame_features = extract_features(frame)
        # generate description
        description = generate_desc(model, tokenizer, frame_features, max_length)
        print(description)
        print(similar)
                
    # Display the resulting frame
    cv2.imshow('frame',frame)
    if (compteur % 15 == 0):
        prev_frame = frame
    compteur += 1
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()

startseq endseq
0.6623630672569446
startseq endseq
0.6843072688658745
startseq a painted wood dog startseq a two shirt on pool while startseq over with a bar and a dog startseq a two shirt on a of shirt on a of shirt on a of shirt on a of shirt on
0.6847647871777065
startseq endseq
0.6872869112317306
startseq endseq
0.6966511121405158
