In [1]:
import json
import os
import argparse
import torch
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from models import EncoderRNN, DecoderRNN, S2VTAttModel, S2VTModel
from dataloader import VideoDataset
import misc.utils as utils
from misc.cocoeval import suppress_stdout_stderr, COCOScorer
import numpy as np

from pandas.io.json import json_normalize
import pandas as pd

def convert_data_to_coco_scorer_format(data_frame):
        gts = {}
        for row in zip(data_frame["caption"], data_frame["video_id"]):
            if row[1] in gts:
                gts[row[1]].append(
                    {'image_id': row[1], 'cap_id': len(gts[row[1]]), 'caption': row[0]})
            else:
                gts[row[1]] = []
                gts[row[1]].append(
                    {'image_id': row[1], 'cap_id': len(gts[row[1]]), 'caption': row[0]})
        return gts

tail = [str(i) for i in range(0,3001,50)];
for t in tail:
    
    print(t)
    args = {'recover_opt': 'data/save/opt_info.json', 'saved_model': 'data/save_533PJ/model_50.pth', 'dump_json': 1, 'results_path': 'results/model_50', 'dump_path': 0, 'gpu': '0', 'batch_size': 25, 'sample_max': 1, 'temperature': 1.0, 'beam_size': 1}


    args['saved_model'] = 'data/save_533PJ_2layer/model_'+t+'.pth';
    args['results_path'] = 'results_533PJ_2layer/model_' + t;

    opt = json.load(open(args["recover_opt"]))
    for k, v in args.items():
            opt[k] = v
    os.environ['CUDA_VISIBLE_DEVICES'] = opt["gpu"]
    with torch.no_grad():
        dataset = VideoDataset(opt, "test")

        opt["vocab_size"] = dataset.get_vocab_size()

        opt["seq_length"] = dataset.max_len

        encoder = EncoderRNN(opt["dim_vid"], opt["dim_hidden"], bidirectional=opt["bidirectional"], input_dropout_p=opt["input_dropout_p"], rnn_dropout_p=opt["rnn_dropout_p"]);
        decoder = DecoderRNN(opt["vocab_size"], opt["max_len"], opt["dim_hidden"], opt["dim_word"], input_dropout_p=opt["input_dropout_p"], rnn_dropout_p=opt["rnn_dropout_p"], bidirectional=opt["bidirectional"]);
        model = S2VTAttModel(encoder, decoder).cuda()

        model.load_state_dict(torch.load(opt["saved_model"]))

        crit = utils.LanguageModelCriterion()

        model.encoder.rnn.bidirectional = bool(model.encoder.rnn.bidirectional)

        vocab = dataset.get_vocab()

        model.eval()

        loader = DataLoader(dataset, batch_size=opt["batch_size"], shuffle=True)

        scorer = COCOScorer()

        gt_dataframe = json_normalize(
                json.load(open(opt["input_json"]))['sentences'])



        gts = convert_data_to_coco_scorer_format(gt_dataframe)
        results = []
        samples = {}
    for data in loader:
        # forward the model to get loss
        fc_feats = data['fc_feats'].cuda()
        labels = data['labels'].cuda()
        masks = data['masks'].cuda()
        video_ids = data['video_ids']

        # forward the model to also get generated samples for each image
        with torch.no_grad():
            seq_probs, seq_preds = model(
                fc_feats, mode='inference', opt=opt)

            sents = utils.decode_sequence(vocab, seq_preds)

            for k, sent in enumerate(sents):
                video_id = video_ids[k]
                samples[video_id] = [{'image_id': video_id, 'caption': sent}]

    with suppress_stdout_stderr():
        valid_score = scorer.score(gts, samples, samples.keys())
    results.append(valid_score)
    print(valid_score)

    if not os.path.exists(opt["results_path"]):
        os.makedirs(opt["results_path"])

    with open(os.path.join(opt["results_path"], "scores.txt"), 'a') as scores_table:
        scores_table.write(json.dumps(results[0]) + "\n")
    with open(os.path.join(opt["results_path"], opt["model"].split("/")[-1].split('.')[0] + ".json"), 'w') as prediction_results:
        json.dump({"predictions": samples, "scores": valid_score}, prediction_results)
    del model
    del encoder
    del decoder
    del results
    del samples
    del gts
    del gt_dataframe
    del seq_probs
    del seq_preds
    del sents
    del fc_feats
    del labels
    del masks
    del dataset
    del scorer
    del loader
    del crit
    torch.cuda.empty_cache()

3000
vocab size is  16860
number of train videos:  6501
number of val videos:  500
number of test videos:  2999
load feats from ['data/feats/resnet152']
max sequence length in data is 28


  "num_layers={}".format(dropout, num_layers))


init COCO-EVAL scorer




tokenization...
setting up scorers...
computing Bleu score...
{'testlen': 25970, 'reflen': 25896, 'guess': [25970, 22971, 19972, 16973], 'correct': [18938, 9429, 4168, 1629]}
ratio:1.002858
Bleu_1: 0.729
Bleu_2: 0.547
Bleu_3: 0.397
Bleu_4: 0.278
computing METEOR score...
METEOR: 0.249
computing Rouge score...
ROUGE_L: 0.539
computing CIDEr score...
CIDEr: 0.343
{'Bleu_1': 0.7292260300346273, 'Bleu_2': 0.5471091124081876, 'Bleu_3': 0.3967814405238057, 'Bleu_4': 0.27826214630402224, 'METEOR': 0.24875412760972368, 'ROUGE_L': 0.5394962946542005, 'CIDEr': 0.34307183701377975}


In [2]:
import torch
import gc
for obj in gc.get_objects():
    try:
        if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)):
            print(type(obj), obj.size())
    except:
        pass

<class 'torch.Tensor'> torch.Size([24, 28])
<class 'torch.Tensor'> torch.Size([24, 28])
<class 'torch.Tensor'> torch.Size([24, 20, 28])
<class 'torch.Tensor'> torch.Size([24, 40, 2048])


