In [1]:
from numpy import argmax
from pickle import load
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import load_model
from nltk.translate.bleu_score import corpus_bleu
import numpy as np

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [12]:
#load the ids for a particular class
filename = 'data/msvd_classes/cook_train_ID.txt'
train= load_set(filename)
print('Dataset: %d' % len(train))


# descriptions
train_descriptions = load_descriptions('data/descriptions_processed.txt', train)
print('Descriptions: train=%d' % len(train_descriptions))

#load c2d features
train_c2d_features = load_video_features('data/msvd_resnet152_features.pkl', train)
print('C2D: train=%d' % len(train_c2d_features))

#load c3d features
train_c3d_features = load_video_features('data/msvd_c3d_features.pkl', train)
print('C3D: train=%d' % len(train_c3d_features))

#load semantic features
train_semantic_features = load_video_features('data/msvd_semantic_features.pkl', train)
print('Semantic: train=%d' % len(train_semantic_features))

# load test set
filename = 'data/msvd_test_ID.txt'
test = load_set(filename)
print('Dataset: %d' % len(test))

# descriptions
test_descriptions = load_descriptions('data/descriptions_processed.txt', test)
print('Descriptions: test=%d' % len(test_descriptions))

#load c2d features
test_c2d_features = load_video_features('data/msvd_resnet152_features.pkl', test)
print('C2D: test=%d' % len(test_c2d_features))

#load c3d features
test_c3d_features = load_video_features('data/msvd_c3d_features.pkl', test)
print('C3D: test=%d' % len(test_c3d_features))

#load semantic features
test_semantic_features = load_video_features('data/msvd_semantic_features.pkl', test)
print('Semantic: test=%d' % len(test_semantic_features))


Dataset: 1196
Descriptions: test=1196
Dataset: 369
Descriptions: test=369
C2D: test=369
C3D: test=369
Semantic: test=369
C2D: train=369
C2D: train=369


In [4]:
domains={'actions','animal','baby','cook','music','ride','simpleactions'}
#load all the models
for i in domains:
    filename = 'domain_specific_models/'+domains[i]+'model_49.h5'
    model[i] = load_model(filename)

actions
animal
baby
cook
music
ride
simpleactions


In [44]:
# given the word id return the word
def word_for_id(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
        if index == integer:
            return word
    return None

# predict one word at each time step
def generate_desc(model, tokenizer,c2d,c3d,sem, length,class_weight):
    # intial token in the sentence
    in_text = 'startseq'
    # continue till either end sequence is reached or maximum length of the sentence
    prob=0
    for i in range(length):
        # convert the input descriptions to their corresponding integers
        sequence = tokenizer.texts_to_sequences([in_text])[0]
        # pad input to the maximum sequence length
        sequence = pad_sequences([sequence], maxlen=length)    
        #resnet152 feature
        c2d1=np.array([c2d])
        #c3d feature
        c3d1=np.array([c3d])
        #semantic feature
        sem1=np.array([sem])
        # predict the word probabilities
        yhat1 = model.predict([c2d1,c3d1,sem1, sequence], verbose=0)
        
        yhat = argmax(yhat1)
        prob+=yhat1[0][yhat]
        
        word = word_for_id(yhat, tokenizer)
       
        if word is None:
            break
        
        in_text += ' ' + word
        
        if word == 'endseq':
            break
        
    return in_text,prob

# evaluate the skill of the model
def evaluate_model(model, descriptions, test_c2d, test_c3d, test_sem, tokenizer, max_length, filename,all_class_weights,domain_semantics,gt_video_cls):
    actual, predicted = list(), list()

    lines = list()
    co=1
    #for every video generate descriptions
    for key, desc_list in descriptions.items():
        print(co)
        co+=1
        print(key)
        prob=0
        num_sem=0
        gt=0
        cls=0
        cls1=0
        sent='startseq'
        sent1='startseq'
        cls2=0
        sent2='startseq'
        prob1=0
        for i in len(domains):
            yhat,prob_new = generate_desc(model[i], tokenizer[class_map[i]], test_c2d[key], test_c3d[key], test_sem[key], max_length,all_class_weights[key])
            #prob_new=prob_new/(len(yhat)-2)
            prob_new=prob_new*all_class_weights[key][i]
            
            
            if gt<gt_video_cls[key][i]:
                gt=gt_video_cls[key][i]
                cls1=i
                sent1=yhat

            
            #analyse base on probability
            prob_new=all_class_weights[key][i]
            if prob1<prob_new:
                prob1=prob_new
                cls2=i
                sent2=yhat
            
        ex=sent
        a=sent.split('startseq')
        b=a[1].split('endseq')
        lines.append('beam_size_1'+'\t'+key + '\t' + b[0])
        references = [d.split() for d in desc_list]
        actual.append(references)
        predicted.append(sent.split())
        #print(sent)
        #
    data = '\n'.join(lines)
    file = open(filename, 'w')
    file.write(data)
    file.close()
    bleu=np.zeros(4)
    # calculate BLEU score
    bleu[0]=corpus_bleu(actual, predicted, weights=(1.0, 0, 0, 0))
    bleu[1]=corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0, 0))
    bleu[2]=corpus_bleu(actual, predicted, weights=(0.3, 0.3, 0.3, 0))
    bleu[3]=corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25))
    print('BLEU-1: %f' % bleu[0])
    print('BLEU-2: %f' % bleu[1])
    print('BLEU-3: %f' % bleu[2])
    print('BLEU-4: %f' % bleu[3])
    return bleu

In [None]:
max_length=49
filename_new = 'generated_captions.txt'
evaluate_model(model, test_descriptions,test_c2d_features, test_c3d_features, test_semantic_features, all_tokenizer, max_length,filename_new,class_features,domain_semantics,gt_video_cls)