## Model Testing

In [1]:
%matplotlib inline
import os
import math
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import configuration as config

from utils.vocabulary import Vocabulary
from utils.caption_generator import CaptionGenerator
from model import MemeModel
from keras.models import load_model
from keras.preprocessing import image
from utils.yanan_lm import text_processing, tokenize_unigram
from utils.yanan_lm import unigram_V, unigrams_prob
from utils.yanan_lm import ngram_prob, perplexity, add_k_smoothing

Using TensorFlow backend.


## Prepare Meme Captioner

In [2]:
checkpoint_path = 'model5.12/train/'
vocab_file = 'batches/word_count.txt'
dataset_dir = 'batches/part-0-to-11960/'
model_file = 'small-conv/model.h5'

In [3]:
#model = load_model('/media/memes/inception_log3.0/fine_inception.h5')
#model.summary()

In [4]:
def build_model(dataset_dir, image_format='jpeg'):
    model = MemeModel('inference',
                      vocab_file,
                      model_file=model_file,
                      dataset_dir=dataset_dir)
    model.build(image_format)
    return model

def feed_image(sess, encoded_image):
    initial_state = sess.run(fetches="lstm/initial_state:0",
                             feed_dict={"image_feed:0": encoded_image})
    return initial_state

def inference_step(sess, input_feed, state_feed):
    softmax_output, state_output = sess.run(
        fetches=["softmax:0", "lstm/state:0"],
        feed_dict={
            "input_feed:0": input_feed,
            "lstm/state_feed:0": state_feed,
        })
    return softmax_output, state_output, None

# Creates a function that restores a model from checkpoint
def create_restore_fn(checkpoint_path, saver):
    if tf.gfile.IsDirectory(checkpoint_path):
        checkpoint_path = tf.train.latest_checkpoint(checkpoint_path)
        if not checkpoint_path:
            raise ValueError("No checkpoint file found in: %s" % checkpoint_path)

    def _restore_fn(sess):
        tf.logging.info("Loading model from checkpoint: %s", checkpoint_path)
        saver.restore(sess, checkpoint_path)
        tf.logging.info("Successfully loaded checkpoint: %s",
                        os.path.basename(checkpoint_path))
        
    return _restore_fn

# Builds the inference graph from a configuration object.
def build_graph_from_config(data_dir, checkpoint_path, image_format='jpeg'):
    tf.logging.info("Building model.")
    model = build_model(data_dir, image_format)
    saver = tf.train.Saver(tf.global_variables())
    return create_restore_fn(checkpoint_path, saver), model

In [5]:
tf.reset_default_graph()
restore_fn, mememodel = build_graph_from_config(dataset_dir,
                                                checkpoint_path,
                                                image_format='jpeg')

INFO:tensorflow:Building model.
Initializing the model's parameters...
Mapping image embeddings...
(1, 128)
Building the LSTM model...
(1, 100)
Setting up the global step tensor...


In [6]:
# Create the vocabulary.
vocab = Vocabulary(vocab_file)

INFO:tensorflow:Initializing vocabulary from file: batches/word_count.txt
INFO:tensorflow:Created vocabulary with 7412 words


In [7]:
mememodel.model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 150, 150, 3)       0         
_________________________________________________________________
conv1 (Conv2D)               (None, 148, 148, 32)      896       
_________________________________________________________________
conv2 (Conv2D)               (None, 146, 146, 16)      4624      
_________________________________________________________________
max_pool1 (MaxPooling2D)     (None, 73, 73, 16)        0         
_________________________________________________________________
droput1 (Dropout)            (None, 73, 73, 16)        0         
_________________________________________________________________
flatten (Flatten)            (None, 85264)             0         
_________________________________________________________________
fc1 (Dense)                  (None, 128)               10913920  
__________

In [8]:
sess = tf.InteractiveSession()
restore_fn(sess)

INFO:tensorflow:Loading model from checkpoint: model5.12/train/model.ckpt-1000000
INFO:tensorflow:Restoring parameters from model5.12/train/model.ckpt-1000000
INFO:tensorflow:Successfully loaded checkpoint: model.ckpt-1000000


In [9]:
generator = CaptionGenerator(feed_image, 
                             inference_step, 
                             vocab,
                             max_caption_length=8)

## Prepare Ngram tester

In [10]:
training_set = 'evaldataset/meme.train.txt'
unk_threshold = 5
ADD_K_SMOOTHING = 'add_k_smoothing'
LINER_INT = 'liner interpolation'
NO_SMOOTHING = 'no smoothing'

In [11]:
# Tokenization
train_text = text_processing(training_set)
train_token = tokenize_unigram(train_text)

In [12]:
unigram_count, replaced_tokens_train = unigram_V(train_token, unk_threshold)
vocabulary = set(unigram_count.keys())

In [13]:
# generate unigram probablity dict
uni_prob_dict = {}
uni_prob_dict = unigram_count.copy()
unigrams_prob_dict = unigrams_prob(uni_prob_dict)

V = len(vocabulary)
print("Vocabulary length", V)

Vocabulary length 5722


In [14]:
# generate trigram probability dict
trigram_prob_dict = ngram_prob(3, replaced_tokens_train, unigram_count)

# generate bigram probability dict
bigram_prob_dict = ngram_prob(2, replaced_tokens_train, unigram_count)

In [15]:
# Caption pre-processing
def text_processing(text, STOP_token='_STOP_'):
    txt = text.replace('\n',' '+STOP_token+'\n')
    puncts = '!"#$%&()*+,-./:;<=>?@[\\]^`{|}~'
    for p in puncts:
        txt = txt.replace(p, ' ')
    return txt

## Caption and Testing

In [16]:
testdir = 'evaldataset/new_memes/'
testdir = [os.path.join(testdir, t) for t in os.listdir(testdir)]
k_ls = (0.0000001,0.000001,0.00001,0.0001,0.01,0.1,1)
basestring = '{} / {:.2f}'

count = 0
for filename in testdir:
    if os.path.exists(filename):
        # Caption image
        img = image.load_img(filename, target_size=(150, 150))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        preds = mememodel.model.predict(x)
        captions = generator.beam_search(sess, preds)
        candidates = ''
        output = ''
        for i, caption in enumerate(captions):
            cap = [vocab.id_to_word(w) for w in caption.sentence[1:-1]]
            cap = ' '.join(cap)
            if i == 0:
                output = cap
            candidates += cap + '\n'
        dev_text = text_processing(candidates)
        perps = 0
        for k in k_ls:
            tri_addk_prob_dict = add_k_smoothing(3, replaced_tokens_train, unigram_count, k, V)
            perps += perplexity(dev_text,3,tri_addk_prob_dict,ADD_K_SMOOTHING)
        count += (perps / len(k_ls))
        print(filename, basestring.format(output, perps / len(k_ls)))
print('total avg:', count / 20)

evaldataset/new_memes/12.png.jpg twat bigbangtheory bigbangtheory bigbangtheory bigbangtheory bigbangtheory / 58.65
evaldataset/new_memes/1.jpg neighbors neighbors neighbors puto afternoon afternoon / 1014.00
evaldataset/new_memes/18.png escort escort escort escort escort escort / 58.65
evaldataset/new_memes/5.jpg count count count count legalize legalize / 6.18
evaldataset/new_memes/11.png.jpg verga verga verga verga kisses читать / 58.65
evaldataset/new_memes/6.jpg bangga tapi rise ref universal universal / 58.65
evaldataset/new_memes/8.jpg reported reported anonymous duke everytime everytime / 1014.00
evaldataset/new_memes/9.png.jpg marco marco nach nach truth truth / 58.65
evaldataset/new_memes/19.png meet meet meet gerd gerd calvin / 1014.00
evaldataset/new_memes/3.jpg touch je je je je je / 58.65
evaldataset/new_memes/17.png theory theory theory sales sales sales / 1014.00
evaldataset/new_memes/20.png gunna gunna gunna gunna not afternoon / 58.65
evaldataset/new_memes/15.png.jpg 

In [17]:
testdir = 'evaldataset/old_memes/'
testdir = [os.path.join(testdir, t) for t in os.listdir(testdir)]
k_ls = (0.0000001,0.000001,0.00001,0.0001,0.01,0.1,1)
basestring = '{} / {:.2f}'

count = 0
for filename in testdir:
    if os.path.exists(filename):
        # Caption image
        img = image.load_img(filename, target_size=(150, 150))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        preds = mememodel.model.predict(x)
        captions = generator.beam_search(sess, preds)
        candidates = ''
        output = ''
        for i, caption in enumerate(captions):
            cap = [vocab.id_to_word(w) for w in caption.sentence[1:-1]]
            cap = ' '.join(cap)
            if i == 0:
                output = cap
            candidates += cap + '\n'
        dev_text = text_processing(candidates)
        perps = 0
        for k in k_ls:
            tri_addk_prob_dict = add_k_smoothing(3, replaced_tokens_train, unigram_count, k, V)
            perps += perplexity(dev_text,3,tri_addk_prob_dict,ADD_K_SMOOTHING)
        count += (perps / len(k_ls))
        print(filename, basestring.format(output, perps / len(k_ls)))
print('total avg:', count / 20)

evaldataset/old_memes/20.jpg frat frat hiatus hiatus eh eh / 6.18
evaldataset/old_memes/10.jpg notice touch ahead ahead ahead ahead / 75.64
evaldataset/old_memes/12.jpg jackass conditions conditions conditions calvin calvin / 58.65
evaldataset/old_memes/1.jpg tower ahead ahead gerd ahead gerd / 1014.00
evaldataset/old_memes/7.jpg spock spock spock afternoon afternoon afternoon / 1014.00
evaldataset/old_memes/5.jpg dances pvp pvp pvp mine pvp / 554.64
evaldataset/old_memes/18.jpg feet feet wait rails rails rails / 58.65
evaldataset/old_memes/6.jpg feelings feelings feelings feelings forgive forgive / 58.65
evaldataset/old_memes/14.jpg teachers teachers teachers teachers teachers teachers / 58.65
evaldataset/old_memes/13.jpg grandma grandma grandma grandma grandma grandma / 58.65
evaldataset/old_memes/15.jpg wished wished wished peyton peyton rustled / 58.65
evaldataset/old_memes/2.jpg 4s 4s 4s 4s bronze bronze / 58.65
evaldataset/old_memes/8.jpg pt pt pt pt clothes clothes / 1014.00
eva

In [18]:
testdir = 'evaldataset/no_memes/'
testdir = [os.path.join(testdir, t) for t in os.listdir(testdir)]
k_ls = (0.0000001,0.000001,0.00001,0.0001,0.01,0.1,1)
basestring = '{} / {:.2f}'

count = 0
for filename in testdir:
    if os.path.exists(filename):
        # Caption image
        img = image.load_img(filename, target_size=(150, 150))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        preds = mememodel.model.predict(x)
        captions = generator.beam_search(sess, preds)
        candidates = ''
        output = ''
        for i, caption in enumerate(captions):
            cap = [vocab.id_to_word(w) for w in caption.sentence[1:-1]]
            cap = ' '.join(cap)
            if i == 0:
                output = cap
            candidates += cap + '\n'
        dev_text = text_processing(candidates)
        perps = 0
        for k in k_ls:
            tri_addk_prob_dict = add_k_smoothing(3, replaced_tokens_train, unigram_count, k, V)
            perps += perplexity(dev_text,3,tri_addk_prob_dict,ADD_K_SMOOTHING)
        count += (perps / len(k_ls))
        print(filename, basestring.format(output, perps / len(k_ls)))
print('total avg:', count / 20)

evaldataset/no_memes/COCO_test2014_000000437409.jpg queue queue queue queue je cultural / 58.65
evaldataset/no_memes/COCO_test2014_000000581919.jpg tower tower tower sort sort reaction / 58.65
evaldataset/no_memes/COCO_test2014_000000437984.jpg shizzle argument argument argument argument argument / 6.18
evaldataset/no_memes/COCO_test2014_000000581911.jpg choir choir choir choir sandy calvin / 1014.00
evaldataset/no_memes/COCO_test2014_000000291121.jpg accept theory theory theory nach admit / 58.65
evaldataset/no_memes/COCO_test2014_000000581645.jpg nach nach nach not not not / 1014.00
evaldataset/no_memes/COCO_test2014_000000437560.jpg armor armor armor wished jeans jeans / 58.65
evaldataset/no_memes/COCO_test2014_000000438020.jpg bangga bangga fps fps fps fps / 58.65
evaldataset/no_memes/COCO_test2014_000000581923.jpg jacket since since since since since / 58.65
evaldataset/no_memes/COCO_test2014_000000291429.jpg memegeneratornet memegeneratornet memegeneratornet memegeneratornet meme

| Datos | Perplejidad promedio vs corpus evaluación, Modelo `A` | Perplejidad promedio vs corpus evaluación, Modelo `B` |
|------:|:----------------:|:---------:|
| Nuevos Memes | 331.14 | 74.66 |
| Memes Evaluación | 241.78 | 47.19 |
| No Memes | 342.68 | 30.17 |

| Imagen | Leyenda Modelo `A` / Perplejidad  | Leyenda Modelo `B` / Perplejidad |
|:------:|:----------------:|:-------------:|
| <img width="50" alt="50" src="evaldataset/no_memes/COCO_test2014_000000437409.jpg"> | ask ask ask burnt seats fora / 1751.52 |  |
| <img width="50" alt="50" src="evaldataset/no_memes/COCO_test2014_000000581919.jpg"> | musica musica starve victoria victoria victoria / 1751.52 |  |
| <img width="50" alt="50" src="evaldataset/no_memes/COCO_test2014_000000437984.jpg"> | facebook facebook 100000 100000 100000 matter / 1751.52 |  |
| <img width="50" alt="50" src="evaldataset/no_memes/COCO_test2014_000000581911.jpg"> | kanji kanji matter matter matter matter / 80.89 |  |


| Imagen | Leyenda 1 | Leyenda 2 | Leyenda 3 |
|:------:|:----------------:|:-------------:|:-------------:|
| <img width="150" alt="200" src="sample/one-does-not-simply.jpg"> | one does not simply have one | one does not simply have a meme | one does not simply have one one |
| <img width="150" alt="200" src="sample/bender.jpg"> | i got a meme | i have a meme | i am a good |

| Imagen | Leyenda 1 | Leyenda 2 | Leyenda 3 |
|:------:|:----------------:|:-------------:|:-------------:|
| <img width="150" alt="200" src="test/cat_eyes.jpg"> | thesis thesis thesis thesis tony banner banner banner | thesis thesis thesis thesis tony banner banner banner | thesis thesis thesis thesis banner tony banner banner |
| <img width="150" alt="200" src="sample/guy-bathroom.jpg"> | imperialist imperialist imperialist gates gates attack | imperialist imperialist imperialist gates gates chad | imperialist imperialist imperialist gates attack chad |

| Imagen | Leyenda 1 | Leyenda 2 | Leyenda 3 |
|:------:|:----------------:|:-------------:|:-------------:|
| <img width="150" alt="200" src="test/cat_eyes.jpg"> | trains trains trains trains trains carefully | otrains trains trains trains carefully carefully | trains trains trains trains trains carefully |
| <img width="150" alt="200" src="sample/bender.jpg"> | kurt virgin born doing bed bed dreams dreams | kurt virgin born doing bed bed chill confused | kurt virgin born doing bed bed dreams dreams |

| Imagen | Leyenda 1 | Leyenda 2 | Leyenda 3 |
|:------:|:----------------:|:-------------:|:-------------:|
| <img width="150" alt="100" src="sample/es-bakans.jpg"> | blacks trade vacation vs pure pure | blacks trade vacation vacation pure pure | blacks trade vacation vs pure magic |
| <img width="150" alt="200" src="sample/bender.jpg"> | avoid avoid surrender easily easily easily | avoid avoid surrender easily easily easily | avoid avoid avoid surrender easily easily |