#  Evaluation Demo

### Imports

In [1]:
import io
import base64
import glob
import os

import gzip
import json
import numpy as np
import torch
from IPython.display import HTML
from skvideo.io import FFmpegReader, ffprobe, vwrite
from torch.autograd import Variable
from ptcap.trainers import DataParallelWrapper
from ptcap.scores import ( caption_accuracy, first_token_accuracy, token_accuracy)
from ptcap.data.annotation_parser import JsonParser
from collections import OrderedDict
from collections import Counter, namedtuple



In [2]:
int2label = {
     0: 'Approaching [something] with your camera',
     1: 'Attaching [something] to [something]',
     2: 'Bending [something] so that it deforms',
     3: 'Bending [something] until it breaks',
     4: 'Burying [something] in [something]',
     5: 'Closing [something]',
     6: 'Covering [something] with [something]',
     7: 'Digging [something] out of [something]',
     8: 'Dropping [something] behind [something]',
     9: 'Dropping [something] in front of [something]',
     10: 'Dropping [something] into [something]',
     11: 'Dropping [something] next to [something]',
     12: 'Dropping [something] onto [something]',
     13: 'Failing to put [something] into [something] because [something] does not fit',
     14: 'Folding [something]',
     15: 'Hitting [something] with [something]',
     16: 'Holding [something]',
     17: 'Holding [something] behind [something]',
     18: 'Holding [something] in front of [something]',
     19: 'Holding [something] next to [something]',
     20: 'Holding [something] over [something]',
     21: 'Laying [something] on the table on its side, not upright',
     22: 'Letting [something] roll along a flat surface',
     23: 'Letting [something] roll down a slanted surface',
     24: 'Letting [something] roll up a slanted surface, so it rolls back down',
     25: 'Lifting [something] up completely without letting it drop down',
     26: 'Lifting [something] up completely, then letting it drop down',
     27: 'Lifting [something] with [something] on it',
     28: 'Lifting a surface with [something] on it but not enough for it to slide down',
     29: 'Lifting a surface with [something] on it until it starts sliding down',
     30: 'Lifting up one end of [something] without letting it drop down',
     31: 'Lifting up one end of [something], then letting it drop down',
     32: 'Moving [part] of [something]',
     33: 'Moving [something] across a surface until it falls down',
     34: 'Moving [something] across a surface without it falling down',
     35: 'Moving [something] and [something] away from each other',
     36: 'Moving [something] and [something] closer to each other',
     37: 'Moving [something] and [something] so they collide with each other',
     38: 'Moving [something] and [something] so they pass each other',
     39: 'Moving [something] away from [something]',
     40: 'Moving [something] away from the camera',
     41: 'Moving [something] closer to [something]',
     42: 'Moving [something] down',
     43: 'Moving [something] towards the camera',
     44: 'Moving [something] up',
     45: 'Moving away from [something] with your camera',
     46: 'Opening [something]',
     47: 'Picking [something] up',
     48: 'Piling [something] up',
     49: 'Plugging [something] into [something]',
     50: 'Plugging [something] into [something] but pulling it right out as you remove your hand',
     51: 'Poking [something] so it slightly moves',
     52: 'Poking [something] so lightly that it doesn’t or almost doesn’t move',
     53: 'Poking [something] so that it falls over',
     54: 'Poking [something] so that it spins around',
     55: 'Poking a hole into [some substance]',
     56: 'Poking a hole into [something soft]',
     57: 'Poking a stack of [something] so the stack collapses',
     58: 'Poking a stack of [something] without the stack collapsing',
     59: 'Pouring [something] into [something]',
     60: 'Pouring [something] into [something] until it overflows',
     61: 'Pouring [something] onto [something]',
     62: 'Pouring [something] out of [something]',
     63: 'Pretending or failing to wipe [something] off of [something]',
     64: 'Pretending or trying and failing to twist [something]',
     65: 'Pretending to be tearing [something that is not tearable]',
     66: 'Pretending to close [something] without actually closing it',
     67: 'Pretending to open [something] without actually opening it',
     68: 'Pretending to pick [something] up',
     69: 'Pretending to poke [something]',
     70: 'Pretending to pour [something] out of [something], but [something] is empty',
     71: 'Pretending to put [something] behind [something]',
     72: 'Pretending to put [something] into [something]',
     73: 'Pretending to put [something] next to [something]',
     74: 'Pretending to put [something] on a surface',
     75: 'Pretending to put [something] onto [something]',
     76: 'Pretending to put [something] underneath [something]',
     77: 'Pretending to scoop [something] up with [something]',
     78: 'Pretending to spread “air” onto [something]',
     79: 'Pretending to sprinkle "air" onto [something]',
     80: 'Pretending to squeeze [something]',
     81: 'Pretending to take [something] from [somewhere]',
     82: 'Pretending to take [something] out of [something]',
     83: 'Pretending to throw [something]',
     84: 'Pretending to turn [something] upside down',
     85: 'Pulling [something] from behind of [something]',
     86: 'Pulling [something] from left to right',
     87: 'Pulling [something] from right to left',
     88: 'Pulling [something] onto [something]',
     89: 'Pulling [something] out of [something]',
     90: 'Pulling two ends of [something] but nothing happens',
     91: 'Pulling two ends of [something] so that it gets stretched',
     92: 'Pulling two ends of [something] so that it separates into two pieces',
     93: 'Pushing [something] from left to right',
     94: 'Pushing [something] from right to left',
     95: 'Pushing [something] off of [something]',
     96: 'Pushing [something] onto [something]',
     97: 'Pushing [something] so it spins',
     98: "Pushing [something] so that it almost falls off but doesn't",
     99: 'Pushing [something] so that it falls off the table',
     100: 'Pushing [something] so that it slightly moves',
     101: 'Pushing [something] with [something]',
     102: 'Putting [number of] [something] onto [something]',
     103: 'Putting [something similar to other things that are already on the table]',
     104: 'Putting [something that cannot actually stand upright] upright on the table, so it falls on its side',
     105: 'Putting [something] and [something] on the table',
     106: 'Putting [something] behind [something]',
     107: 'Putting [something] in front of [something]',
     108: 'Putting [something] into [something]',
     109: 'Putting [something] next to [something]',
     110: 'Putting [something] on a flat surface without letting it roll',
     111: 'Putting [something] on a surface',
     112: 'Putting [something] on the edge of [something] so it is not supported and falls down',
     113: 'Putting [something] onto [something else that cannot support it] so it falls down',
     114: 'Putting [something] onto [something]',
     115: 'Putting [something] onto a slanted surface but it doesn’t glide down',
     116: 'Putting [something] that can’t roll onto a slanted surface, so it slides down',
     117: 'Putting [something] that can’t roll onto a slanted surface, so it stays where it is',
     118: 'Putting [something] underneath [something]',
     119: 'Putting [something] upright on the table',
     120: 'Putting [something], [something] and [something] on the table',
     121: 'Removing [something], revealing [something] behind',
     122: 'Rolling [something] on a flat surface',
     123: 'Scooping [something] up with [something]',
     124: 'Show a shadow of [something] that is moving. ',
     125: 'Show a shadow of [something], making sure the shadow is not moving. ',
     126: 'Showing [something] behind [something]',
     127: 'Showing [something] next to [something]',
     128: 'Showing [something] on top of [something]',
     129: 'Showing [something] to the camera',
     130: 'Showing a photo of [something] to the camera',
     131: 'Showing a shadow of [something] that is moving.',
     132: 'Showing a shadow of [something], making sure the shadow is not moving.',
     133: 'Showing that [something] is empty',
     134: 'Showing that [something] is inside [something]',
     135: 'Spilling [something] behind [something]',
     136: 'Spilling [something] next to [something]',
     137: 'Spilling [something] onto [something]',
     138: 'Spinning [something] so it continues spinning',
     139: 'Spinning [something] that quickly stops spinning',
     140: 'Spreading [something] onto [something]',
     141: 'Sprinkling [something] onto [something]',
     142: 'Squeezing [something]',
     143: 'Stacking [number of] [something]',
     144: 'Stuffing [something] into [something]',
     145: 'Taking [one of many similar things on the table]',
     146: 'Taking [something] from [somewhere]',
     147: 'Taking [something] out of [something]',
     148: 'Tearing [something] into two pieces',
     149: 'Tearing [something] just a little bit',
     150: 'Throwing [something]',
     151: 'Throwing [something] against [something]',
     152: 'Throwing [something] in the air and catching it',
     153: 'Throwing [something] in the air and letting it fall',
     154: 'Throwing [something] onto a surface',
     155: 'Tilting [something] with [something] on it slightly so it doesn’t fall down',
     156: 'Tilting [something] with [something] on it until it falls off',
     157: 'Tipping [something] over',
     158: 'Tipping [something] with [something in it] over, so [something in it] falls out',
     159: 'Touching (without moving) [part] of [something]',
     160: 'Trying but failing to attach [something] to [something] because it doesn’t stick',
     161: 'Trying to bend [something unbendable] so nothing happens',
     162: 'Trying to pour [something] into [something], but missing so it spills next to it',
     163: 'Turning [something] upside down',
     164: 'Turning the camera downwards while filming [something]',
     165: 'Turning the camera left while filming [something]',
     166: 'Turning the camera right while filming [something]',
     167: 'Turning the camera upwards while filming [something]',
     168: 'Twisting (wringing) [something] wet until water comes out',
     169: 'Twisting [something]',
     170: 'Uncovering [something]',
     171: 'Unfolding [something]',
     172: 'Wiping [something] off of [something]',
     173: '[Something] being deflected from [something]',
     174: '[Something] colliding with [something] and both are being deflected',
     175: '[Something] colliding with [something] and both come to a halt',
     176: '[Something] falling like a feather or paper',
     177: '[Something] falling like a rock'
}

In [3]:
stop_words = ['A', 'AN', 'THE', '<END>']

def safe_div(x,y):
    if y == 0:
        return 0
    return x / y
def fscore(precision, recall, beta=1):
    numerator = (1.0 + (beta ** 2)) * precision * recall
    denominator = ((beta ** 2) * precision) + recall
    return {"fscore": safe_div(numerator, denominator)}
class LCS(object):
    """
    The main functionality of this class is to compute the LCS (Lowest Common
    Subsequence) between a caption and prediction. By default, it returns the
    precision and recall values calculated based on the LCS between a prediction
    and a caption.
    """
    def __init__(self, functions_list, tokenizer):
        """
        Initializes functions_list and tokenizer.
        Args:
        functions_list: A list of the functions that will be applied on the
        precision and recall values calculated based on the LCS between a
        prediction and a caption.
        """

        self.functions_list = functions_list
        self.scores_container = OrderedDict()
        self.scores_dict = OrderedDict()
        self.tokenizer = tokenizer

    def __call__(self, outputs):
        string_predictions = [self.tokenizer.get_string(str_pred.data.numpy())
                              for str_pred in outputs.predictions]
        return self.score_batch(string_predictions, outputs.string_captions)

    def collect_scores(self, batch_scores_dict, scores_dict):
        for metric, metric_value in scores_dict.items():
            if metric not in batch_scores_dict:
                batch_scores_dict[metric] = [metric_value]
            else:
                batch_scores_dict[metric].append(metric_value)
        return batch_scores_dict

    @classmethod
    def compute_lcs(cls, prediction, caption):
        num_rows = len(prediction)
        num_cols = len(caption)

        table = [[0] * (num_cols + 1) for _ in range(num_rows + 1)]
        for i in range(1, num_rows + 1):
            for j in range(1, num_cols + 1):
                if prediction[i - 1] == caption[j - 1]:
                    table[i][j] = table[i - 1][j - 1] + 1
                else:
                    table[i][j] = max(table[i][j - 1], table[i - 1][j])
        return table, table[num_rows][num_cols]

    def mean_scores(self, batch_scores_dict):
        for metric, metric_value in batch_scores_dict.items():
            batch_scores_dict[metric] = np.mean(metric_value)
        return batch_scores_dict

    def score_batch(self, predictions, captions):
        assert len(predictions) == len(captions)

        batch_scores_dict = OrderedDict()
        for count, (prediction, caption) in enumerate(zip(predictions,
                                                          captions)):
            scores_dict = self.score_sample(prediction.split(), caption.split())
            batch_scores_dict = self.collect_scores(batch_scores_dict,
                                                    scores_dict)

        batch_scores_dict = self.mean_scores(batch_scores_dict)
        return batch_scores_dict

    def score_sample(self, prediction, caption):
        scores_dict = OrderedDict()
        _, lcs_score = self.compute_lcs(prediction, caption)
        scores_dict["precision"] = safe_div(lcs_score, len(prediction))
        scores_dict["recall"] = safe_div(lcs_score, len(caption))

        for score_function in self.functions_list:
            scores_dict.update(score_function(scores_dict["precision"],
                                              scores_dict["recall"]))

        return scores_dict

### Tool to deal with mpeg videos

In [4]:
def show_video(video_filenames):
    """
    Tool to display videos inside the notebook.
    """
    
    if type(video_filenames) is not list:
        video_filenames = [video_filenames]
    
    html_code = ''
    for filename in video_filenames:
        video = io.open(filename, 'r+b').read()
        encoded = base64.b64encode(video)
        html_code += '''
        <video alt="test" width="640" height="480" controls>
        <source src="data:video/mp4;base64,{0}" type="video/mp4" />
        </video>
        '''.format(encoded.decode('ascii'))
        
    return HTML(data= html_code)


def open_mpeg_video(fname, framerate, size):
    """
    Open an mpeg video, and return it as a numpy array.
    """
    
    metadata = ffprobe(fname)
    duration = float(metadata['video']['@duration'])
    # Compute corresponding nb of frames
    nframes = int(duration * framerate)
    oargs = {
        "-r": "%d" % framerate,
        "-vframes": "%d" % nframes,
        "-s": "%dx%d" % (size[0], size[1])
    }
    # Open file
    reader = FFmpegReader(fname, inputdict={}, outputdict=oargs)
    video = []
    # Get frames until there is no more
    for frame in reader.nextFrame():
        video.append(frame)
    # Return as a numpy array
    return np.array(video)

## A) Model Instantiation

In [5]:
from ptcap.model.captioners import EncoderDecoder
from ptcap.model.encoders import C3dLSTMEncoder
from ptcap.model.two_stream_encoders import TwoStreamEncoder
from ptcap.model.external_encoders import FCEncoder, JesterEncoder, BIJesterEncoder
from ptcap.model.decoders import LSTMDecoder, CoupledLSTMDecoder
  
#net = FullyConvolutionalNet(num_classes=178)jester1024_cutoff_300_ssssssss/

#net = EncoderDecoder(
#        encoder=BIJesterEncoder,
#        decoder=CoupledLSTMDecoder,
#        encoder_kwargs={"freeze": False},#, "pretrained_path": "/home/farzaneh/PycharmProjects/pretrained_nets/fully_conv_net_on_smtsmt_20170627/model.checkpoint"},
#        decoder_kwargs={"embedding_size": 256, "hidden_size": 1024, "num_lstm_layers": 2, 
#        "vocab_size": 2986, "num_step" :17}, 
#        gpus=[0]).cuda()
net = EncoderDecoder(
        encoder=TwoStreamEncoder,
        decoder=CoupledLSTMDecoder,
        encoder_kwargs={"encoder_output_size": 1024,"c2d_output_size": 256,
                   "c3d_output_size": 256, "rnn_output_size":1024},#, "pretrained_path": "/home/farzaneh/PycharmProjects/pretrained_nets/fully_conv_net_on_smtsmt_20170627/model.checkpoint"},
        decoder_kwargs={"embedding_size": 256, "hidden_size": 1024, "num_lstm_layers": 2, 
        "vocab_size": 2986, "num_step" :17, "fc_size":1024}, 
        gpus=[0]).cuda()
net = DataParallelWrapper(net, device_ids=[0]).cuda(0)

gpus: [0]


## B) Load weights

In [6]:
path = '/home/farzaneh/PycharmProjects/pytorch-captioning/results/clapnet_balanced_tokens/'
# path = '/home/farzaneh/PycharmProjects/pytorch-captioning/results/clapnet_captioning_only_f0.1'
path = '/home/farzaneh/PycharmProjects/pytorch-captioning/results/cs5/Jan_two_stream_c23d_labels_cutoff5'


checkpoint = torch.load(path + '/model.best')


net.load_state_dict(checkpoint["model"])


## C) Load Tokenizer

In [7]:
for key in checkpoint["model"]:
    if "c3d_feature_extractor" in key:
        print(checkpoint["model"][key])


In [8]:
from ptcap.data.tokenizer import Tokenizer

USER_MAXLEN=17
tokenizer = Tokenizer(user_maxlen=USER_MAXLEN)
tokenizer.load_dictionaries(path)

## D) Testing 

In [9]:
TMP_VIDEO_FILENAME = 'tmp.mp4'

def unpreprocess(video):
    video = video.data.numpy()[0]
    video = 64. * video.transpose(1, 2, 3, 0)
    return np.array(video, 'uint8')

def demo(net, preprocessor, filename, top_n=5):
    # Open mpeg file and get a numpy array
    video_uint8 = open_mpeg_video(filename, 12, [128, 128])
    # Preprocessing
    video = preprocessor(video_uint8)
    # Convert to torch variable
    video = Variable(torch.from_numpy(video[None]), volatile=True).cuda()
    empty_caption = Variable(torch.zeros([1, 1]), volatile=True).long().cuda()
    
    # Compute predictions
    pred, class_pred = net.forward((video, empty_caption), use_teacher_forcing=False)
    # Convert to numpy 
    pred = np.exp(pred.cpu().data.numpy())[0]
        
    pred_argmax = np.argmax(pred, axis=1)
    decoded_pred = tokenizer.decode_caption(pred_argmax)
    beautiful_caption = " ".join(str(e+" ") for e in decoded_pred if "<END>" not in e)
    #print('__CAPTION__: {}'.format(beautiful_caption))
    
    
    # Class index
    class_index = torch.max(class_pred, dim=1)[1].cpu().data[0]
    cls = int2label[class_index]
    #print('ACTION: {:60s}\n'.format(cls))
    
    
    matched_action = get_template(decoded_pred, templates, tokenizer)
    # print(actions)
    objects = get_object_tokens(decoded_pred, matched_action[0][0])

    
    objects_list = extract_objects(objects)
    # Print class name with proba
    # Save input video in tmp file
    vwrite(TMP_VIDEO_FILENAME, unpreprocess(video.cpu()))
    return beautiful_caption, cls, objects_list, matched_action[0][0]


def path_generator(annotation_path, root_path):
    with gzip.open(annotation_path, "rt") as f:
        annotations = json.load(f)
    files = [elem['file'] for elem in annotations]
    labels = [elem['label'] for elem in annotations]
    placeholders = [elem['placeholders'] for elem in annotations] 
    actions = [elem['template'].replace("[","").replace("]", "").replace(",","").upper() for elem in annotations]
    
    return ((os.path.join(root_path, f), label, a, p) for f,label,p, a in zip(files, labels, placeholders, actions))

In [10]:
# Path generator
path_gen = path_generator('/data/20bn-somethingsomething/json/test_20170929.json.gz', 
                          '/data/20bn-somethingsomething/videos')
# Put the netwoark in evaluation mode
_ = net.eval()

#### Preprocessor

In [11]:
from rtorchn.data.preprocessing import default_evaluation_preprocesser

preprocessor = default_evaluation_preprocesser([48, 96, 96], 64.)

In [12]:
for i in range(5000):
    path_to_video, label, _,_ = next(path_gen)

## Longest Common Subsequence

In [13]:
def get_template(candidates, templates, tokenizer):
   
    lcs = LCS([fscore], tokenizer)
    
    max_templates = []
    #print("There are {} templates".format(len(templates)))

    candidates = [" ".join(candidates)]
    for candidate in candidates:
        
        max_lcs_template = ""
        max_lcs_value = -1
        for template in templates:
            lcs_value = compute_LCS(lcs, candidate, template, tokenizer)
            if lcs_value > max_lcs_value:
                max_lcs_template = template
                max_lcs_value = lcs_value
        max_templates.append((max_lcs_template, max_lcs_value))
#         print("Candidate: {}".format(candidate))
        #print("MATCHED ACTION : {}".format(max_lcs_template))

    return max_templates


def compute_LCS(lcs, candidate, template, tokenizer):
    encoded_caption = Variable(
        torch.LongTensor([tokenizer.encode_caption(candidate)]))
    encoded_prediction = Variable(
        torch.LongTensor([tokenizer.encode_caption(template)]))
    score_attr = namedtuple("ScoresAttr", "string_captions captions predictions")
    in_tuple = score_attr([candidate], encoded_caption, encoded_prediction)
    lcs_output = lcs(in_tuple)
    return lcs_output['fscore']

def extract_objects(object_tokens_list):
    
    objects_list = []
    if len(object_tokens_list) == 0:
        return objects_list
    
    next_token_ind =  object_tokens_list[0][0]
    current_object = ""
    for  (ind, token) in object_tokens_list:
        if  next_token_ind == ind:
            current_object += token+" "
        else:
            objects_list.append(current_object+" ")
            current_object = token
            next_token_ind = ind
        next_token_ind += 1
        
        
    if len(current_object)>0:
        objects_list.append(current_object)
               
    #print("PREDICTED OBJECTS: {}".format(objects_list))
    return objects_list



def get_object_tokens(caption, template):
    return [(i,token) for (i,token) in enumerate(caption) if token not in template and token not in stop_words]

In [14]:
articles= ["the", "a", "an", "A", "An", "The"]

annotations = JsonParser.open_annotation("/data/20bn-somethingsomething/json/train_20171031.json.gz")
templates = np.unique(annotations["template"]) # A list of templates
objects = annotations["placeholders"]
obj_tokens = [token for token in objects if token not in stop_words]
all_obj=[item for sublist in objects for item in sublist]
filtered_obj =  [" ".join(obj) for obj in all_obj for  token in obj if token not in articles]
templates = [" ".join(tokenizer.tokenize(t)) for t in templates]


# get_objects(sentences1[0].split(), sentence1_templates[0].split())



In [None]:
articles= ["the", "a", "an", "A", "An", "The"]

fil3 = [token.upper() for obj in all_obj for token in obj.split(" ")]
fil2 = [[token for token in obj.split(" ") if token not in articles ] for obj in all_obj]
fil = list( map(lambda p:" ".join([token for token in p.split(" ") if token not in articles ]), all_obj))



In [None]:
all_objects = {}
correct_objects = {}
all_actions = {}
correct_actions = {}
correct_actions_classif = {}

for i in range(1000):
    path_to_video, target_caption, target_action, target_objects = next(path_gen)
    for p in target_objects:
        p_tokens = p.split(" ")
        for pto in p_tokens:
            all_objects[pto.strip().upper()] = all_objects.get(pto.strip().upper(), 0) + 1
        
    all_actions[target_action] = all_actions.get(target_action, 0) + 1

   
    print('sample {}'.format(i)) 
    pred_caption, pred_action, pred_objects, matched_action = demo(net, preprocessor, path_to_video)
    for (i,o) in enumerate(pred_objects):
        o_tokens = o.strip().split(" ")
        for oto in o_tokens:
            if i<len(target_objects) and oto in target_objects[i].upper():
                correct_objects[oto] = correct_objects.get(oto, 0) + 1
                print("woohoo")
               
          
    print('TARGET CAPTION: {}'.format(target_caption))
    print('PRED   CAPTION: {}\n'.format(pred_caption))
    print('TARGET  ACTION: {}'.format(target_action))
    print('CLASSIF ACTION: {}'.format(pred_action))
    print('CAPTION ACTION: {}\n'.format(matched_action))
            
    print('TARGET OBJECTS: {}'.format(target_objects))
    print('PRED   OBJECTS: {}\n'.format(pred_objects))


    if matched_action == target_action:
        print("yesss")
        correct_actions[matched_action] = correct_actions.get(matched_action, 0) + 1
        
    if pred_action.replace("[","").replace("]","").lower() == target_action.replace("[","").replace("]","").lower():
        correct_actions_classif[pred_action] = correct_actions_classif.get(pred_action, 0) + 1
        
    print('{}\n'.format('-'*65))
    
print(all_objects)
print(correct_objects)

print(all_actions)
print(correct_actions)
    

sample 0
TARGET CAPTION: moving ball away from comp
PRED   CAPTION: TILTING  A  CUTTING  BOARD  WITH  A  CAN  ON  IT  SLIGHTLY  SO  IT 

TARGET  ACTION: MOVING SOMETHING AWAY FROM SOMETHING
CLASSIF ACTION: Tilting [something] with [something] on it slightly so it doesn’t fall down
CAPTION ACTION: TILTING SOMETHING WITH SOMETHING ON IT SLIGHTLY SO IT DOESNT FALL DOWN

TARGET OBJECTS: ['ball', 'comp']
PRED   OBJECTS: ['CUTTING BOARD  ', 'CAN']

-----------------------------------------------------------------

sample 1
TARGET CAPTION: holding cards
PRED   CAPTION: HOLDING  A  REMOTE 

TARGET  ACTION: HOLDING SOMETHING
CLASSIF ACTION: Holding [something] next to [something]
CAPTION ACTION: HOLDING SOMETHING

TARGET OBJECTS: ['cards']
PRED   OBJECTS: ['REMOTE ']

yesss
-----------------------------------------------------------------

sample 2
TARGET CAPTION: holding remote next to doll
PRED   CAPTION: SHOWING  A  PAIR  OF  SCISSORS  NEXT  TO  A  PAIR  OF  SCISSORS 

TARGET  ACTION: HOLDIN

TARGET CAPTION: pretending to spread “air” onto Kitchen table
PRED   CAPTION: PULLING  PILLOW  FROM  RIGHT  TO  LEFT 

TARGET  ACTION: PRETENDING TO SPREAD “AIR” ONTO SOMETHING
CLASSIF ACTION: Pulling [something] from right to left
CAPTION ACTION: PULLING SOMETHING FROM RIGHT TO LEFT

TARGET OBJECTS: ['Kitchen table']
PRED   OBJECTS: ['PILLOW ']

-----------------------------------------------------------------

sample 21
woohoo
TARGET CAPTION: putting a wallet  next to a glass
PRED   CAPTION: PUTTING  A  CUP  NEXT  TO  A  GLASS 

TARGET  ACTION: PUTTING SOMETHING NEXT TO SOMETHING
CLASSIF ACTION: Putting [something] on a surface
CAPTION ACTION: PUTTING SOMETHING NEXT TO SOMETHING

TARGET OBJECTS: ['a wallet ', 'a glass']
PRED   OBJECTS: ['CUP  ', 'GLASS']

yesss
-----------------------------------------------------------------

sample 22
TARGET CAPTION: pretending to put a computer chip into a glass
PRED   CAPTION: PRETENDING  TO  PUT  A  COIN  INTO  A  MUG 

TARGET  ACTION: PRETENDIN

TARGET CAPTION: putting a pair of spectacles onto a dashboard
PRED   CAPTION: PUTTING  A  REMOTE  ONTO  A  SLANTED  SURFACE  BUT  IT  DOESNT  GLIDE  DOWN 

TARGET  ACTION: PUTTING SOMETHING ONTO SOMETHING
CLASSIF ACTION: Putting [something] onto [something]
CAPTION ACTION: PUTTING SOMETHING ONTO A SLANTED SURFACE BUT IT DOESNT GLIDE DOWN

TARGET OBJECTS: ['a pair of spectacles', 'a dashboard']
PRED   OBJECTS: ['REMOTE ']

-----------------------------------------------------------------

sample 41
TARGET CAPTION: taking a jar out of a shelf
PRED   CAPTION: TAKING  BOTTLE  FROM  RACK 

TARGET  ACTION: TAKING SOMETHING OUT OF SOMETHING
CLASSIF ACTION: Taking [something] from [somewhere]
CAPTION ACTION: TAKING SOMETHING FROM SOMEWHERE

TARGET OBJECTS: ['a jar', 'a shelf']
PRED   OBJECTS: ['BOTTLE  ', 'RACK']

-----------------------------------------------------------------

sample 42
TARGET CAPTION: dropping Phone onto Pants
PRED   CAPTION: DROPPING  PILLOW  ONTO  BED 

TARGET  ACTION: D

TARGET CAPTION: turning Wood box upside down
PRED   CAPTION: TURNING  A  BOOK  UPSIDE  DOWN 

TARGET  ACTION: TURNING SOMETHING UPSIDE DOWN
CLASSIF ACTION: Turning [something] upside down
CAPTION ACTION: TURNING SOMETHING UPSIDE DOWN

TARGET OBJECTS: ['Wood box']
PRED   OBJECTS: ['BOOK ']

yesss
-----------------------------------------------------------------

sample 61
woohoo
TARGET CAPTION: pouring wine into a wine glass
PRED   CAPTION: POURING  WATER  INTO  A  GLASS 

TARGET  ACTION: POURING SOMETHING INTO SOMETHING
CLASSIF ACTION: Pouring [something] into [something]
CAPTION ACTION: POURING SOMETHING INTO SOMETHING

TARGET OBJECTS: ['wine', 'a wine glass']
PRED   OBJECTS: ['WATER  ', 'GLASS']

yesss
-----------------------------------------------------------------

sample 62
TARGET CAPTION: bending a stick until it breaks
PRED   CAPTION: BENDING  A  RULER  SO  THAT  IT  DEFORMS 

TARGET  ACTION: BENDING SOMETHING UNTIL IT BREAKS
CLASSIF ACTION: Moving [something] and [something] c

TARGET CAPTION: pushing cup so that it falls off the table
PRED   CAPTION: PUSHING  A  BOX  SO  THAT  IT  FALLS  OFF  THE  TABLE 

TARGET  ACTION: PUSHING SOMETHING SO THAT IT FALLS OFF THE TABLE
CLASSIF ACTION: Pushing [something] so that it falls off the table
CAPTION ACTION: PUSHING SOMETHING SO THAT IT FALLS OFF THE TABLE

TARGET OBJECTS: ['cup']
PRED   OBJECTS: ['BOX ']

yesss
-----------------------------------------------------------------

sample 81
TARGET CAPTION: trying but failing to attach banner  to board because it doesn’t stick
PRED   CAPTION: PUTTING  A  BOOK  A  PAPER  INTO  A  POCKET 

TARGET  ACTION: TRYING BUT FAILING TO ATTACH SOMETHING TO SOMETHING BECAUSE IT DOESN’T STICK
CLASSIF ACTION: Pretending to turn [something] upside down
CAPTION ACTION: PUTTING SOMETHING INTO SOMETHING

TARGET OBJECTS: ['banner ', 'board']
PRED   OBJECTS: ['BOOK  ', 'PAPER ', 'POCKET']

-----------------------------------------------------------------

sample 82
TARGET CAPTION: holding a

TARGET CAPTION: showing that coffee cup is empty
PRED   CAPTION: SHOWING  THAT  A  MUG  IS  EMPTY 

TARGET  ACTION: SHOWING THAT SOMETHING IS EMPTY
CLASSIF ACTION: Showing that [something] is empty
CAPTION ACTION: SHOWING THAT SOMETHING IS EMPTY

TARGET OBJECTS: ['coffee cup']
PRED   OBJECTS: ['MUG ']

yesss
-----------------------------------------------------------------

sample 100
TARGET CAPTION: putting dice that can’t roll onto a slanted surface, so it slides down
PRED   CAPTION: LETTING  A  PEN  ROLL  DOWN  A  SLANTED  SURFACE 

TARGET  ACTION: PUTTING SOMETHING THAT CAN’T ROLL ONTO A SLANTED SURFACE SO IT SLIDES DOWN
CLASSIF ACTION: Letting [something] roll down a slanted surface
CAPTION ACTION: LETTING SOMETHING ROLL DOWN A SLANTED SURFACE

TARGET OBJECTS: ['dice']
PRED   OBJECTS: ['PEN ']

-----------------------------------------------------------------

sample 101
TARGET CAPTION: putting sunglass, stapler and bangles on the table
PRED   CAPTION: PUTTING  A  FORK  A  SPOON  

woohoo
TARGET CAPTION: pushing tissue from right to left
PRED   CAPTION: PUSHING  TISSUE  FROM  RIGHT  TO  LEFT 

TARGET  ACTION: PUSHING SOMETHING FROM RIGHT TO LEFT
CLASSIF ACTION: Pushing [something] from right to left
CAPTION ACTION: PUSHING SOMETHING FROM RIGHT TO LEFT

TARGET OBJECTS: ['tissue']
PRED   OBJECTS: ['TISSUE ']

yesss
-----------------------------------------------------------------

sample 118
TARGET CAPTION: tearing tissue just a little bit
PRED   CAPTION: TEARING  PAPER  JUST  A  LITTLE  BIT 

TARGET  ACTION: TEARING SOMETHING JUST A LITTLE BIT
CLASSIF ACTION: Tearing [something] just a little bit
CAPTION ACTION: TEARING SOMETHING JUST A LITTLE BIT

TARGET OBJECTS: ['tissue']
PRED   OBJECTS: ['PAPER ']

yesss
-----------------------------------------------------------------

sample 119
TARGET CAPTION: tearing tissue just a little bit
PRED   CAPTION: TEARING  PAPER  JUST  A  LITTLE  BIT 

TARGET  ACTION: TEARING SOMETHING JUST A LITTLE BIT
CLASSIF ACTION: Tearing [s

TARGET CAPTION: pushing Steel bowl from right to left
PRED   CAPTION: PUSHING  A  BANGLE  FROM  RIGHT  TO  LEFT 

TARGET  ACTION: PUSHING SOMETHING FROM RIGHT TO LEFT
CLASSIF ACTION: Pushing [something] from right to left
CAPTION ACTION: PUSHING SOMETHING FROM RIGHT TO LEFT

TARGET OBJECTS: ['Steel bowl']
PRED   OBJECTS: ['BANGLE ']

yesss
-----------------------------------------------------------------

sample 137
woohoo
TARGET CAPTION: poking a hole into paper
PRED   CAPTION: TEARING  PAPER  INTO  TWO  PIECES 

TARGET  ACTION: POKING A HOLE INTO SOME SUBSTANCE
CLASSIF ACTION: Lifting [something] up completely without letting it drop down
CAPTION ACTION: TEARING SOMETHING INTO TWO PIECES

TARGET OBJECTS: ['paper']
PRED   OBJECTS: ['PAPER ']

-----------------------------------------------------------------

sample 138
TARGET CAPTION: holding keyboard in front of monitor
PRED   CAPTION: HOLDING  A  BOTTLE  OVER  A  BOOK 

TARGET  ACTION: HOLDING SOMETHING IN FRONT OF SOMETHING
CLASSIF

TARGET CAPTION: putting glue bottle, watch and keys on the table
PRED   CAPTION: PUTTING  A  SPOON  A  FORK  AND  A  SPOON  ON  THE  TABLE 

TARGET  ACTION: PUTTING SOMETHING SOMETHING AND SOMETHING ON THE TABLE
CLASSIF ACTION: Putting [something] and [something] on the table
CAPTION ACTION: PUTTING SOMETHING AND SOMETHING ON THE TABLE

TARGET OBJECTS: ['glue bottle', 'watch', 'keys']
PRED   OBJECTS: ['SPOON  ', 'FORK ', 'SPOON']

-----------------------------------------------------------------

sample 156
TARGET CAPTION: poking Speaker so lightly that it doesn’t or almost doesn’t move
PRED   CAPTION: PRETENDING  TO  POKE  STACK  OF  COFFEE  CUPS 

TARGET  ACTION: POKING SOMETHING SO LIGHTLY THAT IT DOESN’T OR ALMOST DOESN’T MOVE
CLASSIF ACTION: Poking [something] so lightly that it doesn’t or almost doesn’t move
CAPTION ACTION: PRETENDING TO POKE SOMETHING

TARGET OBJECTS: ['Speaker']
PRED   OBJECTS: ['STACK OF COFFEE CUPS ']

---------------------------------------------------------

TARGET CAPTION: spilling tea onto table
PRED   CAPTION: HOLDING  PENCIL  CASE  BEHIND  VASE 

TARGET  ACTION: SPILLING SOMETHING ONTO SOMETHING
CLASSIF ACTION: Pouring [something] out of [something]
CAPTION ACTION: HOLDING SOMETHING BEHIND SOMETHING

TARGET OBJECTS: ['tea', 'table']
PRED   OBJECTS: ['PENCIL CASE  ', 'VASE']

-----------------------------------------------------------------

sample 174
TARGET CAPTION: feather falling like a feather or paper
PRED   CAPTION: DROPPING  A  COIN  ONTO  THE  FLOOR 

TARGET  ACTION: SOMETHING FALLING LIKE A FEATHER OR PAPER
CLASSIF ACTION: [Something] falling like a rock
CAPTION ACTION: DROPPING SOMETHING ONTO SOMETHING

TARGET OBJECTS: ['feather']
PRED   OBJECTS: ['COIN  ', 'FLOOR']

-----------------------------------------------------------------

sample 175
TARGET CAPTION: unfolding a piece of paper
PRED   CAPTION: PRETENDING  TO  BE  TEARING  A  PLASTIC  SCALE 

TARGET  ACTION: UNFOLDING SOMETHING
CLASSIF ACTION: Twisting [something]
CAPT

TARGET CAPTION: wiping floor off of towel
PRED   CAPTION: WIPING  WATER  OFF  OF  TABLE 

TARGET  ACTION: WIPING SOMETHING OFF OF SOMETHING
CLASSIF ACTION: Pretending to throw [something]
CAPTION ACTION: WIPING SOMETHING OFF OF SOMETHING

TARGET OBJECTS: ['floor', 'towel']
PRED   OBJECTS: ['WATER  ', 'TABLE']

yesss
-----------------------------------------------------------------

sample 193
woohoo
woohoo
TARGET CAPTION: pouring water into glass until it overflows
PRED   CAPTION: POURING  WATER  INTO  A  GLASS 

TARGET  ACTION: POURING SOMETHING INTO SOMETHING UNTIL IT OVERFLOWS
CLASSIF ACTION: Pouring [something] out of [something]
CAPTION ACTION: POURING SOMETHING INTO SOMETHING

TARGET OBJECTS: ['water', 'glass']
PRED   OBJECTS: ['WATER  ', 'GLASS']

-----------------------------------------------------------------

sample 194
TARGET CAPTION: glass falling like a rock
PRED   CAPTION: A  BALL  FALLING  LIKE  A  ROCK 

TARGET  ACTION: SOMETHING FALLING LIKE A ROCK
CLASSIF ACTION: [So

TARGET CAPTION: taking scissor out of box
PRED   CAPTION: TAKING  A  KNIFE  OUT  OF  THE  DRAWER 

TARGET  ACTION: TAKING SOMETHING OUT OF SOMETHING
CLASSIF ACTION: Taking [something] from [somewhere]
CAPTION ACTION: TAKING SOMETHING OUT OF SOMETHING

TARGET OBJECTS: ['scissor', 'box']
PRED   OBJECTS: ['KNIFE  ', 'DRAWER']

yesss
-----------------------------------------------------------------

sample 212
TARGET CAPTION: lifting book with bat on it
PRED   CAPTION: LIFTING  NOTEBOOK  WITH  SCISSORS  ON  IT 

TARGET  ACTION: LIFTING SOMETHING WITH SOMETHING ON IT
CLASSIF ACTION: Lifting [something] with [something] on it
CAPTION ACTION: LIFTING SOMETHING WITH SOMETHING ON IT

TARGET OBJECTS: ['book', 'bat']
PRED   OBJECTS: ['NOTEBOOK  ', 'SCISSORS']

yesss
-----------------------------------------------------------------

sample 213
TARGET CAPTION: spinning deodrant that quickly stops spinning
PRED   CAPTION: SPINNING  A  BOTTLE  THAT  QUICKLY  STOPS  SPINNING 

TARGET  ACTION: SPINNING

woohoo
TARGET CAPTION: pouring water onto wood
PRED   CAPTION: POURING  WATER  INTO  A  GLASS 

TARGET  ACTION: POURING SOMETHING ONTO SOMETHING
CLASSIF ACTION: Pouring [something] into [something]
CAPTION ACTION: POURING SOMETHING INTO SOMETHING

TARGET OBJECTS: ['water', 'wood']
PRED   OBJECTS: ['WATER  ', 'GLASS']

-----------------------------------------------------------------

sample 231
TARGET CAPTION: moving scissors up
PRED   CAPTION: LETTING  A  BALL  ROLL  DOWN  A  SLANTED  SURFACE 

TARGET  ACTION: MOVING SOMETHING UP
CLASSIF ACTION: Letting [something] roll along a flat surface
CAPTION ACTION: LETTING SOMETHING ROLL DOWN A SLANTED SURFACE

TARGET OBJECTS: ['scissors']
PRED   OBJECTS: ['BALL ']

-----------------------------------------------------------------

sample 232
TARGET CAPTION: putting toy on a surface
PRED   CAPTION: PUTTING  A  BATTERY  UPRIGHT  ON  THE  TABLE 

TARGET  ACTION: PUTTING SOMETHING ON A SURFACE
CLASSIF ACTION: Removing [something], revealing [some

TARGET CAPTION: stuffing Pen into Pen stand
PRED   CAPTION: CLOSING  TIN 

TARGET  ACTION: STUFFING SOMETHING INTO SOMETHING
CLASSIF ACTION: Pretending to take [something] out of [something]
CAPTION ACTION: CLOSING SOMETHING

TARGET OBJECTS: ['Pen', 'Pen stand']
PRED   OBJECTS: ['TIN ']

-----------------------------------------------------------------

sample 250
TARGET CAPTION: pretending to take Chocolate  from Box
PRED   CAPTION: PUTTING  A  BOOK 

TARGET  ACTION: PRETENDING TO TAKE SOMETHING FROM SOMEWHERE
CLASSIF ACTION: Putting [something] onto [something]
CAPTION ACTION: PUTTING SOMETHING ON A SURFACE

TARGET OBJECTS: ['Chocolate ', 'Box']
PRED   OBJECTS: ['BOOK ']

-----------------------------------------------------------------

sample 251
TARGET CAPTION: holding Remote behind Mobile
PRED   CAPTION: PRETENDING  TO  PUT  A  BOX  INTO  A  CABINET 

TARGET  ACTION: HOLDING SOMETHING BEHIND SOMETHING
CLASSIF ACTION: Putting [something] behind [something]
CAPTION ACTION: PRETENDI

TARGET CAPTION: opening a vessel
PRED   CAPTION: SHOWING  THAT  A  MUG  IS  EMPTY 

TARGET  ACTION: OPENING SOMETHING
CLASSIF ACTION: Showing that [something] is empty
CAPTION ACTION: SHOWING THAT SOMETHING IS EMPTY

TARGET OBJECTS: ['a vessel']
PRED   OBJECTS: ['MUG ']

-----------------------------------------------------------------

sample 269
TARGET CAPTION: pushing glasses from right to left
PRED   CAPTION: PULLING  NAIL  POLISH  FROM  RIGHT  TO  LEFT 

TARGET  ACTION: PUSHING SOMETHING FROM RIGHT TO LEFT
CLASSIF ACTION: Pulling [something] from right to left
CAPTION ACTION: PULLING SOMETHING FROM RIGHT TO LEFT

TARGET OBJECTS: ['glasses']
PRED   OBJECTS: ['NAIL POLISH ']

-----------------------------------------------------------------

sample 270
TARGET CAPTION: holding A stick
PRED   CAPTION: DROPPING  PLASTIC  BOTTLE  ONTO  MATTRESS 

TARGET  ACTION: HOLDING SOMETHING
CLASSIF ACTION: Pretending to open [something] without actually opening it
CAPTION ACTION: DROPPING SOMETHIN

TARGET CAPTION: spinning pen so it continues spinning
PRED   CAPTION: LIFTING  UP  ONE  END  OF  SPOON  WITHOUT  LETTING  IT  DROP  DOWN 

TARGET  ACTION: SPINNING SOMETHING SO IT CONTINUES SPINNING
CLASSIF ACTION: Pushing [something] so it spins
CAPTION ACTION: LIFTING UP ONE END OF SOMETHING WITHOUT LETTING IT DROP DOWN

TARGET OBJECTS: ['pen']
PRED   OBJECTS: ['SPOON ']

-----------------------------------------------------------------

sample 289
TARGET CAPTION: pretending to squeeze giant baby bottle 
PRED   CAPTION: SHOWING  A  SHADOW  OF  A  HAND  MAKING  SURE  THE  SHADOW  IS  NOT 

TARGET  ACTION: PRETENDING TO SQUEEZE SOMETHING
CLASSIF ACTION: Showing that [something] is empty
CAPTION ACTION: SHOWING A SHADOW OF SOMETHING MAKING SURE THE SHADOW IS NOT MOVING

TARGET OBJECTS: ['giant baby bottle ']
PRED   OBJECTS: ['HAND ']

-----------------------------------------------------------------

sample 290
TARGET CAPTION: lifting mousepad with keys on it
PRED   CAPTION: MOVING  PEN

TARGET CAPTION: stacking 3 candies
PRED   CAPTION: PUTTING  A  BAG  NEXT  TO  A  CHAIR 

TARGET  ACTION: STACKING NUMBER OF SOMETHING
CLASSIF ACTION: Putting [something similar to other things that are already on the table]
CAPTION ACTION: PUTTING SOMETHING NEXT TO SOMETHING

TARGET OBJECTS: ['3', 'candies']
PRED   OBJECTS: ['BAG  ', 'CHAIR']

-----------------------------------------------------------------

sample 309
TARGET CAPTION: bending small envelope so that it deforms
PRED   CAPTION: TAKING  A  BOOK 

TARGET  ACTION: BENDING SOMETHING SO THAT IT DEFORMS
CLASSIF ACTION: Taking [one of many similar things on the table]
CAPTION ACTION: TAKING SOMETHING FROM SOMEWHERE

TARGET OBJECTS: ['small envelope']
PRED   OBJECTS: ['BOOK ']

-----------------------------------------------------------------

sample 310
TARGET CAPTION: moving bottom of Christmas tree cross
PRED   CAPTION: PUTTING  A  <UNK>  NEXT  TO  A  PLANT 

TARGET  ACTION: MOVING PART OF SOMETHING
CLASSIF ACTION: Putting [s

TARGET CAPTION: pretending to turn A plant pot upside down
PRED   CAPTION: PRETENDING  TO  TURN  A  CAN  UPSIDE  DOWN 

TARGET  ACTION: PRETENDING TO TURN SOMETHING UPSIDE DOWN
CLASSIF ACTION: Pretending to turn [something] upside down
CAPTION ACTION: PRETENDING TO TURN SOMETHING UPSIDE DOWN

TARGET OBJECTS: ['A plant pot']
PRED   OBJECTS: ['CAN ']

yesss
-----------------------------------------------------------------

sample 328
woohoo
TARGET CAPTION: tearing paper into two pieces
PRED   CAPTION: TEARING  PAPER  INTO  TWO  PIECES 

TARGET  ACTION: TEARING SOMETHING INTO TWO PIECES
CLASSIF ACTION: Tearing [something] into two pieces
CAPTION ACTION: TEARING SOMETHING INTO TWO PIECES

TARGET OBJECTS: ['paper']
PRED   OBJECTS: ['PAPER ']

yesss
-----------------------------------------------------------------

sample 329
TARGET CAPTION: throwing a button
PRED   CAPTION: THROWING  A  BALL  AGAINST  THE  WALL 

TARGET  ACTION: THROWING SOMETHING
CLASSIF ACTION: Throwing [something] in the

TARGET CAPTION: pretending to put a computer game into a package
PRED   CAPTION: PUTTING  A  BAG  ON  A  SURFACE 

TARGET  ACTION: PRETENDING TO PUT SOMETHING INTO SOMETHING
CLASSIF ACTION: Covering [something] with [something]
CAPTION ACTION: PUTTING SOMETHING ON A SURFACE

TARGET OBJECTS: ['a computer game', 'a package']
PRED   OBJECTS: ['BAG ']

-----------------------------------------------------------------

sample 348
TARGET CAPTION: throwing a portfolio
PRED   CAPTION: THROWING  PEN 

TARGET  ACTION: THROWING SOMETHING
CLASSIF ACTION: Throwing [something]
CAPTION ACTION: THROWING SOMETHING

TARGET OBJECTS: ['a portfolio']
PRED   OBJECTS: ['PEN ']

yesss
-----------------------------------------------------------------

sample 349
TARGET CAPTION: dropping a card behind a motorcycle helmet
PRED   CAPTION: DROPPING  A  MATCHSTICK  BEHIND  A  PADLOCK 

TARGET  ACTION: DROPPING SOMETHING BEHIND SOMETHING
CLASSIF ACTION: Dropping [something] into [something]
CAPTION ACTION: DROPPING 

TARGET CAPTION: closing scissor
PRED   CAPTION: MOVING  PEN  AWAY  FROM  PEN 

TARGET  ACTION: CLOSING SOMETHING
CLASSIF ACTION: Pretending to pick [something] up
CAPTION ACTION: MOVING SOMETHING AWAY FROM SOMETHING

TARGET OBJECTS: ['scissor']
PRED   OBJECTS: ['PEN  ', 'PEN']

-----------------------------------------------------------------

sample 367
TARGET CAPTION: showing spectacles behind box
PRED   CAPTION: SHOWING  A  SHADOW  OF  A  <UNK>  MAKING  SURE  THE  SHADOW  IS  NOT 

TARGET  ACTION: SHOWING SOMETHING BEHIND SOMETHING
CLASSIF ACTION: Showing [something] to the camera
CAPTION ACTION: SHOWING A SHADOW OF SOMETHING MAKING SURE THE SHADOW IS NOT MOVING

TARGET OBJECTS: ['spectacles', 'box']
PRED   OBJECTS: ['<UNK> ']

-----------------------------------------------------------------

sample 368
TARGET CAPTION: putting spectacles into box
PRED   CAPTION: PUTTING  A  CAMERA  ONTO  A  BLACK  BOX 

TARGET  ACTION: PUTTING SOMETHING INTO SOMETHING
CLASSIF ACTION: Putting [somet

TARGET CAPTION: moving Plastic spoon closer to Orange
PRED   CAPTION: PUSHING  A  COIN  WITH  A  BUTTON 

TARGET  ACTION: MOVING SOMETHING CLOSER TO SOMETHING
CLASSIF ACTION: Pushing [something] with [something]
CAPTION ACTION: PUSHING SOMETHING WITH SOMETHING

TARGET OBJECTS: ['Plastic spoon', 'Orange']
PRED   OBJECTS: ['COIN  ', 'BUTTON']

-----------------------------------------------------------------

sample 385
woohoo
woohoo
TARGET CAPTION: covering Pen with Paper
PRED   CAPTION: COVERING  PEN  WITH  PAPER 

TARGET  ACTION: COVERING SOMETHING WITH SOMETHING
CLASSIF ACTION: Covering [something] with [something]
CAPTION ACTION: COVERING SOMETHING WITH SOMETHING

TARGET OBJECTS: ['Pen', 'Paper']
PRED   OBJECTS: ['PEN  ', 'PAPER']

yesss
-----------------------------------------------------------------

sample 386
TARGET CAPTION: covering Cell phone with Envelope
PRED   CAPTION: CLOSING  A  DOOR 

TARGET  ACTION: COVERING SOMETHING WITH SOMETHING
CLASSIF ACTION: Covering [something]

TARGET CAPTION: folding paper
PRED   CAPTION: CLOSING  A  BOOK 

TARGET  ACTION: FOLDING SOMETHING
CLASSIF ACTION: Folding [something]
CAPTION ACTION: CLOSING SOMETHING

TARGET OBJECTS: ['paper']
PRED   OBJECTS: ['BOOK ']

-----------------------------------------------------------------

sample 404
TARGET CAPTION: holding A remote
PRED   CAPTION: TURNING  THE  CAMERA  LEFT  WHILE  FILMING  MOBILE  PHONE  CHARGER 

TARGET  ACTION: HOLDING SOMETHING
CLASSIF ACTION: Holding [something] behind [something]
CAPTION ACTION: TURNING THE CAMERA LEFT WHILE FILMING SOMETHING

TARGET OBJECTS: ['A remote']
PRED   OBJECTS: ['MOBILE PHONE CHARGER ']

-----------------------------------------------------------------

sample 405
TARGET CAPTION: picking glass up
PRED   CAPTION: LIFTING  TOY  <UNK>  UP  COMPLETELY  WITHOUT  LETTING  IT  DROP  DOWN 

TARGET  ACTION: PICKING SOMETHING UP
CLASSIF ACTION: Lifting [something] up completely without letting it drop down
CAPTION ACTION: LIFTING SOMETHING UP COM

TARGET CAPTION: wiping rain drops  off of car
PRED   CAPTION: WIPING  DUST  OFF  OF  WINDOW 

TARGET  ACTION: WIPING SOMETHING OFF OF SOMETHING
CLASSIF ACTION: Wiping [something] off of [something]
CAPTION ACTION: WIPING SOMETHING OFF OF SOMETHING

TARGET OBJECTS: ['rain drops ', 'car']
PRED   OBJECTS: ['DUST  ', 'WINDOW']

yesss
-----------------------------------------------------------------

sample 424
woohoo
woohoo
TARGET CAPTION: moving mouse closer to keyboard
PRED   CAPTION: MOVING  MOUSE  CLOSER  TO  KEYBOARD 

TARGET  ACTION: MOVING SOMETHING CLOSER TO SOMETHING
CLASSIF ACTION: Moving [something] closer to [something]
CAPTION ACTION: MOVING SOMETHING CLOSER TO SOMETHING

TARGET OBJECTS: ['mouse', 'keyboard']
PRED   OBJECTS: ['MOUSE  ', 'KEYBOARD']

yesss
-----------------------------------------------------------------

sample 425
TARGET CAPTION: letting Toy car roll along a flat surface
PRED   CAPTION: LETTING  BOTTLE  ROLL  ALONG  A  FLAT  SURFACE 

TARGET  ACTION: LETTING 

TARGET CAPTION: pretending to put an envelope  onto a bowl
PRED   CAPTION: PRETENDING  TO  PUT  A  SPOON  INTO  A  PLATE 

TARGET  ACTION: PRETENDING TO PUT SOMETHING ONTO SOMETHING
CLASSIF ACTION: Pretending to put [something] into [something]
CAPTION ACTION: PRETENDING TO PUT SOMETHING INTO SOMETHING

TARGET OBJECTS: ['an envelope ', 'a bowl']
PRED   OBJECTS: ['SPOON  ', 'PLATE']

-----------------------------------------------------------------

sample 444
TARGET CAPTION: holding a pen in front of a glass
PRED   CAPTION: SHOWING  A  BOX  BEHIND  A  BOTTLE 

TARGET  ACTION: HOLDING SOMETHING IN FRONT OF SOMETHING
CLASSIF ACTION: Showing [something] next to [something]
CAPTION ACTION: SHOWING SOMETHING BEHIND SOMETHING

TARGET OBJECTS: ['a pen', 'a glass']
PRED   OBJECTS: ['BOX  ', 'BOTTLE']

-----------------------------------------------------------------

sample 445
TARGET CAPTION: holding a pencil over a sink
PRED   CAPTION: HOLDING  A  SPOON  IN  FRONT  OF  THE  CUP 

TARGET  ACT

TARGET CAPTION: laying paper roll on the table on its side, not upright
PRED   CAPTION: PUTTING  A  REMOTE  AND  A  PEN  ON  THE  TABLE 

TARGET  ACTION: LAYING SOMETHING ON THE TABLE ON ITS SIDE NOT UPRIGHT
CLASSIF ACTION: Tearing [something] into two pieces
CAPTION ACTION: PUTTING SOMETHING AND SOMETHING ON THE TABLE

TARGET OBJECTS: ['paper roll']
PRED   OBJECTS: ['REMOTE  ', 'PEN']

-----------------------------------------------------------------

sample 464
TARGET CAPTION: laying pitcher on the table on its side, not upright
PRED   CAPTION: COVERING  A  BOOK  WITH  A  PAPER 

TARGET  ACTION: LAYING SOMETHING ON THE TABLE ON ITS SIDE NOT UPRIGHT
CLASSIF ACTION: Covering [something] with [something]
CAPTION ACTION: COVERING SOMETHING WITH SOMETHING

TARGET OBJECTS: ['pitcher']
PRED   OBJECTS: ['BOOK  ', 'PAPER']

-----------------------------------------------------------------

sample 465
TARGET CAPTION: dropping shirt in front of shirt
PRED   CAPTION: DROPPING  BOOK  ONTO  TABLE 

TARGET CAPTION: trying to bend Crochet hook so nothing happens
PRED   CAPTION: TRYING  TO  BEND  KNIFE  SO  NOTHING  HAPPENS 

TARGET  ACTION: TRYING TO BEND SOMETHING UNBENDABLE SO NOTHING HAPPENS
CLASSIF ACTION: Trying to bend [something unbendable] so nothing happens
CAPTION ACTION: TRYING TO BEND SOMETHING UNBENDABLE SO NOTHING HAPPENS

TARGET OBJECTS: ['Crochet hook']
PRED   OBJECTS: ['KNIFE ']

yesss
-----------------------------------------------------------------

sample 483
TARGET CAPTION: lifting up one end of A marker without letting it drop down
PRED   CAPTION: LIFTING  UP  ONE  END  OF  CUTTER  WITHOUT  LETTING  IT  DROP  DOWN 

TARGET  ACTION: LIFTING UP ONE END OF SOMETHING WITHOUT LETTING IT DROP DOWN
CLASSIF ACTION: Lifting up one end of [something] without letting it drop down
CAPTION ACTION: LIFTING UP ONE END OF SOMETHING WITHOUT LETTING IT DROP DOWN

TARGET OBJECTS: ['A marker']
PRED   OBJECTS: ['CUTTER ']

yesss
----------------------------------------------------

TARGET CAPTION: showing spects on top of book
PRED   CAPTION: SHOWING  A  PHOTO  OF  A  CAR  TO  THE  CAMERA 

TARGET  ACTION: SHOWING SOMETHING ON TOP OF SOMETHING
CLASSIF ACTION: Showing [something] on top of [something]
CAPTION ACTION: SHOWING A PHOTO OF SOMETHING TO THE CAMERA

TARGET OBJECTS: ['spects', 'book']
PRED   OBJECTS: ['CAR ']

-----------------------------------------------------------------

sample 502
TARGET CAPTION: moving A block and An ornament so they collide with each other
PRED   CAPTION: MOVING  A  BOTTLE  AND  A  TOY  SO  THEY  COLLIDE  WITH  EACH  OTHER 

TARGET  ACTION: MOVING SOMETHING AND SOMETHING SO THEY COLLIDE WITH EACH OTHER
CLASSIF ACTION: [Something] colliding with [something] and both are being deflected
CAPTION ACTION: MOVING SOMETHING AND SOMETHING SO THEY COLLIDE WITH EACH OTHER

TARGET OBJECTS: ['A block', 'An ornament']
PRED   OBJECTS: ['BOTTLE  ', 'TOY']

yesss
-----------------------------------------------------------------

sample 503
TARGE

woohoo
TARGET CAPTION: A ball being deflected from A trash can
PRED   CAPTION: LETTING  BALL  ROLL  ALONG  A  FLAT  SURFACE 

TARGET  ACTION: SOMETHING BEING DEFLECTED FROM SOMETHING
CLASSIF ACTION: Letting [something] roll along a flat surface
CAPTION ACTION: LETTING SOMETHING ROLL ALONG A FLAT SURFACE

TARGET OBJECTS: ['A ball', 'A trash can']
PRED   OBJECTS: ['BALL ']

-----------------------------------------------------------------

sample 520
TARGET CAPTION: bending Rubber band  so that it deforms
PRED   CAPTION: BENDING  PLASTIC  KNIFE  UNTIL  IT  BREAKS 

TARGET  ACTION: BENDING SOMETHING SO THAT IT DEFORMS
CLASSIF ACTION: Tearing [something] into two pieces
CAPTION ACTION: BENDING SOMETHING UNTIL IT BREAKS

TARGET OBJECTS: ['Rubber band ']
PRED   OBJECTS: ['PLASTIC KNIFE ']

-----------------------------------------------------------------

sample 521
woohoo
TARGET CAPTION: tearing A piece of paper just a little bit
PRED   CAPTION: TEARING  PAPER  INTO  TWO  PIECES 

TARGET  A

TARGET CAPTION: holding A bottle in front of A cup
PRED   CAPTION: POURING  WATER  OUT  OF  BUCKET 

TARGET  ACTION: HOLDING SOMETHING IN FRONT OF SOMETHING
CLASSIF ACTION: Putting [something] on a surface
CAPTION ACTION: POURING SOMETHING OUT OF SOMETHING

TARGET OBJECTS: ['A bottle', 'A cup']
PRED   OBJECTS: ['WATER  ', 'BUCKET']

-----------------------------------------------------------------

sample 539
TARGET CAPTION: pushing A paint brush so that it almost falls off but doesn't
PRED   CAPTION: PUSHING  A  PENCIL  BOX  WITH  A  BROOM 

TARGET  ACTION: PUSHING SOMETHING SO THAT IT ALMOST FALLS OFF BUT DOESN'T
CLASSIF ACTION: Pushing [something] with [something]
CAPTION ACTION: PUSHING SOMETHING WITH SOMETHING

TARGET OBJECTS: ['A paint brush']
PRED   OBJECTS: ['PENCIL BOX  ', 'BROOM']

-----------------------------------------------------------------

sample 540
TARGET CAPTION: covering a plush with a shirt
PRED   CAPTION: COVERING  A  BOOK  WITH  A  BLANKET 

TARGET  ACTION: COV

TARGET CAPTION: rolling coconut on a flat surface
PRED   CAPTION: LETTING  A  BALL  ROLL  ALONG  A  FLAT  SURFACE 

TARGET  ACTION: ROLLING SOMETHING ON A FLAT SURFACE
CLASSIF ACTION: Letting [something] roll along a flat surface
CAPTION ACTION: LETTING SOMETHING ROLL ALONG A FLAT SURFACE

TARGET OBJECTS: ['coconut']
PRED   OBJECTS: ['BALL ']

-----------------------------------------------------------------

sample 558
TARGET CAPTION: bending drumstick until it breaks
PRED   CAPTION: MOVING  A  TOY  CAR  AND  A  TOY  ELEPHANT  CLOSER  TO  EACH  OTHER 

TARGET  ACTION: BENDING SOMETHING UNTIL IT BREAKS
CLASSIF ACTION: Moving [something] and [something] closer to each other
CAPTION ACTION: MOVING SOMETHING AND SOMETHING CLOSER TO EACH OTHER

TARGET OBJECTS: ['drumstick']
PRED   OBJECTS: ['TOY CAR  ', 'TOYELEPHANT ']

-----------------------------------------------------------------

sample 559
TARGET CAPTION: putting camphor packet onto sugar bottle
PRED   CAPTION: DROPPING  A  BOX  ONT

In [None]:
for ca in correct_actions:
    
    print("{}/{} of {} actions correct".format(correct_actions[ca], all_actions[ca], ca))

In [None]:
for correct_key in correct_objects.keys():
    denom = 0
    if correct_key in all_objects.keys():
        denom += all_objects[correct_key]
        #print("{}:{}".format(correct_key, all_objects[correct_key]))
    #for j in all_objects.keys():
    #    if correct_key in j or correct_key in j:
    #        print("{}:{}".format(j, all_objects[j]))
    #        denom += all_objects[j]
        
                    
    print (">>model got  {}/{} of '{}'s correct".format(correct_objects[correct_key],denom, correct_key ))
    print("-"*100)
    

In [None]:
a = sum(correct_objects.values())
b = sum(all_objects.values())

print("{} out of {} objects are correctly predicted: {:.2}% ".format(a, b, a/b*100))

In [91]:
len(all_objects.keys())

599

In [92]:
c = sum(correct_actions.values())
d = sum(all_actions.values())

print("{} out of {} actions are correctly predicted: {:}% ".format(c, d, c/d*100))

317 out of 1000 actions are correctly predicted: 31.7% 


In [93]:
len(all_actions.keys())

164

In [101]:
e=sum(correct_actions_classif.values())
print("{} out of {} classification actions are correctly classified: {:}% ".format(e, d, e/d*100))

359 out of 1000 classification actions are correctly classified: 35.9% 
