In [1]:
import os
import torch

from get_loader import (VideoDataset_to_VideoCaptionsLoader, Vocabulary,
                        get_loader)
from trainer import Trainer
import pandas as pd

In [17]:
gpu = '0'
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = gpu

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

dataset = "MSVD"
checkpoints_dir = os.path.join("checkpoints", dataset)
output_csv_dir = os.path.join("results", dataset)

batch_size = 128

In [18]:
# Get the dataset and dataloaders

dataset_folder = os.path.join("datasets", dataset)
vocab_pkl = os.path.join(dataset_folder, "metadata", "vocab.pkl")
vocab = Vocabulary.load(vocab_pkl)

val_loader, _ = get_loader(
    root_dir=dataset_folder,
    dataset=dataset,
    split="val",
    batch_size=batch_size,
    vocab_pkl=vocab_pkl,
)
test_loader, _ = get_loader(
    root_dir=dataset_folder,
    dataset=dataset,
    split="val",#"test",
    batch_size=batch_size,
    vocab_pkl=vocab_pkl,
)

val_vidCap_loader = VideoDataset_to_VideoCaptionsLoader(val_loader.dataset, batch_size)
test_vidCap_loader = VideoDataset_to_VideoCaptionsLoader(test_loader.dataset, batch_size)

--------------------------------------------------
Initializing loader:
Dataset: MSVD
Split: val
Video_only ?: False
--------------------------------------------------
Before integrity check: 3720
After integrity check: 2562
After removing unverified: 1055
Loading Vocab: datasets\MSVD\metadata\vocab.pkl 
--------------------------------------------------
Initializing loader:
Dataset: MSVD
Split: val
Video_only ?: False
--------------------------------------------------
Before integrity check: 3720
After integrity check: 2562
After removing unverified: 1055
Loading Vocab: datasets\MSVD\metadata\vocab.pkl 


In [19]:
# Get a trainer for model evaluation
tr = Trainer(checkpoint_name='test.ckpt', log_dir='trash')
tr.device = device

score_results = []

# Load the models and predict
for ckpt in os.listdir(checkpoints_dir):
    if ckpt.endswith('_best.pt'):
        print("\nLoading model from checkpoint:", ckpt)
        model = torch.load(os.path.join(checkpoints_dir, ckpt))

        for loader, phase in zip([val_vidCap_loader, test_vidCap_loader], ['val', 'test']):
            # try:
            loader.video_only = ('video_only' in ckpt)
            scores, true_captions, generated_captions = tr.eval(model, loader, training_phase=phase, epoch=0, get_scores=True)

            model_name = ckpt.split('_best')[0]
            model_results = {
                'generated_captions': generated_captions,
                'true_captions': true_captions,
            }

            if not os.path.isdir(os.path.join(output_csv_dir, phase)):
                os.makedirs(os.path.join(output_csv_dir, phase))

            df = pd.DataFrame(model_results)
            df.to_csv(
                os.path.join(output_csv_dir, phase, f"{model_name}.csv"),
                header=True,
                columns=["generated_captions", "true_captions"],
            )

            print(scores)
            scores.update({'run': f"{model_name}.csv", 'split': phase})
            score_results.append(scores)

            # except:
            #     print(f"Could not compute {phase} results for {ckpt}.")

e of paper is being folded .)
b_BuSVZwq6M_1_9 >> [a man is riding a man and his on a basketball .] (a man makes a great play in a cricket game .)
bb6V0Grtub4_174_185 >> [a man is riding a bicycle on a stage .] (a man is playing on drums .)
bkazguPsusc_74_85 >> [a man is cutting a cat .] (a cat is sliding under a couch .)
bmxIurBrW5s_51_70 >> [a man is playing a piano .] (a woman practicing a volleyball)
bruzcOyIGeg_4_12 >> [a man plays a car .] (a man drives a remote control car .)
btuxO-C2IzE_64_72 >> [a man is playing the guitar .] (a lion jumps up and is hugged and petted by two long - <UNK> men .)
buJ5HDCinrM_150_166 >> [a man is a woman is slicing .] (a woman is putting make up on her face .)
bxDlC7YV5is_0_12 >> [a man is playing the guitar .] (a boy is playing a key - board between the people .)

{'testlen': 568, 'reflen': 552, 'guess': [568, 498, 428, 358], 'correct': [386, 159, 82, 27]}
ratio:1.028986
EVAL :   0%|          | 0/1 [00:00<?, ?it/s]{'Bleu_1': 0.679577464787536, 'Bl

In [20]:
# print(score_results)
df = pd.DataFrame(score_results)
df.head(16)

Unnamed: 0,Bleu_1,Bleu_2,Bleu_3,Bleu_4,ROUGE_L,CIDEr,run,split
0,0.684601,0.461646,0.351056,0.246333,0.651561,0.384021,SA-LSTM_30_epochs_video_audio_global_0.5_5e-5.csv,val
1,0.684601,0.461646,0.351056,0.246333,0.651561,0.384021,SA-LSTM_30_epochs_video_audio_global_0.5_5e-5.csv,test
2,0.704673,0.481519,0.371008,0.268346,0.660898,0.406292,SA-LSTM_30_epochs_video_audio_local_0.5_5e-5.csv,val
3,0.704673,0.481519,0.371008,0.268346,0.660898,0.406292,SA-LSTM_30_epochs_video_audio_local_0.5_5e-5.csv,test
4,0.681905,0.454774,0.338185,0.227978,0.646632,0.407902,SA-LSTM_30_epochs_video_audio_none_0.5_5e-5.csv,val
5,0.681905,0.454774,0.338185,0.227978,0.646632,0.407902,SA-LSTM_30_epochs_video_audio_none_0.5_5e-5.csv,test
6,0.679577,0.465804,0.346411,0.236627,0.646601,0.4848,SA-LSTM_30_epochs_video_global_0.5_5e-5.csv,val
7,0.679577,0.465804,0.346411,0.236627,0.646601,0.4848,SA-LSTM_30_epochs_video_global_0.5_5e-5.csv,test
8,0.653465,0.427636,0.315465,0.21706,0.624365,0.383496,SA-LSTM_30_epochs_video_local_0.5_5e-5.csv,val
9,0.653465,0.427636,0.315465,0.21706,0.624365,0.383496,SA-LSTM_30_epochs_video_local_0.5_5e-5.csv,test


In [25]:
dataset = "MSR-VTT"
results_csv = os.path.join("results", dataset, f"NLP_score_{dataset}.csv")
df = pd.read_csv(results_csv)

df = df[df['split'] == 'test']

def get_reconstructor(x):
    if 'global' in x: 
        return get_modalities(x) + ', Global'
    if 'local' in x: 
        return get_modalities(x) + ', Local'
    return '-'

def get_modalities(x):
    if 'video' in x and 'audio' in x: return 'A+V'
    if 'video' in x: return 'V'
    return '-'

df['Reconstructor'] = df['run'].apply(lambda x: get_reconstructor(x))
df['Modalities'] = df['run'].apply(lambda x: 'V' if 'video_only' in x else 'A+V')
df['Decoder'] = 'SA-LSTM'

df = df.groupby(['Decoder', 'Modalities', 'Reconstructor']).max().reset_index()

df.head(15)

df.round(3).to_latex(
    results_csv.replace('.csv', '.tex'), 
    # columns=['Decoder', 'Modalities', 'Reconstructor', 'Bleu_4', 'ROUGE_L', 'CIDEr'],
    columns=['Decoder', 'Modalities', 'Reconstructor', 'Bleu_4', 'METEOR', 'ROUGE_L', 'CIDEr'],

    #['Decoder', 'Modalities', 'Reconstructor', 'Bleu_1', 'Bleu_2', 'Bleu_3', 'Bleu_4', 'METEOR', 'ROUGE_L', 'CIDEr'],
    index=False,
)