In [1]:
import os
import pickle
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR

import warnings
warnings.filterwarnings('ignore')

# Import Path,Vocabulary, utility, evaluator and datahandler module
from config import Path
from dictionary import Vocabulary
from utils import Utils
from evaluate import Evaluator
from data import DataHandler


import random
import numpy as np
import copy

#set seed for reproducibility
utils = Utils()
utils.set_seed(1)

# Mean Pooling

In [None]:
#Import configuration and model 
from config import ConfigMP
from models.mean_pooling.model import MeanPooling

#create Mean pooling object
cfg = ConfigMP()
# specifying the dataset in configuration object from {'msvd','msrvtt'}
cfg.dataset = 'msvd'
#creation of path object
path = Path(cfg,os.getcwd())

#Changing the hyperparameters in configuration object
#cfg.batch_size = 100 #training batch size
cfg.n_layers = 1    # number of layers in decoder rnn
cfg.decoder_type = 'lstm'  # from {'lstm','gru'}
cfg.vocabulary_min_count = 1

Vocabulary creation or load

In [None]:
#Vocabulary object
voc = Vocabulary(cfg)
#If vocabulary is already saved or downloaded the saved file
# voc.load() #comment this if using vocabulary for the first time or with no saved file
# print('Vocabulary Size : ',voc.num_words) 


# Uncomment this block if using vocabulary for the first time or if there is no saved file
text_dict = {}
voc = Vocabulary(cfg)
data_handler = DataHandler(cfg,path,voc)

import json
print(path.feature_file)
json.load(open(path.feature_file))
text_dict.update(data_handler.train_dict)
text_dict.update(data_handler.val_dict)
text_dict.update(data_handler.test_dict)
for k,v in text_dict.items():
    for anno in v:
        voc.addSentence(anno)
voc.save()


##Uncomment this block for filtering Rare Words from Dictionary
min_count = cfg.vocabulary_min_count #remove all words below count min_count
voc.trim(min_count=min_count)
print('Vocabulary Size : ',voc.num_words)

Dataloaders model and evaluator

In [None]:
# Datasets and dataloaders
data_handler = DataHandler(cfg,path,voc)
train_dset,val_dset,test_dset = data_handler.getDatasets()
train_loader,val_loader,test_loader = data_handler.getDataloader(train_dset,val_dset,test_dset)

#Model object
model = MeanPooling(voc,cfg,path)
#Evaluator object on test data
test_evaluator_greedy = Evaluator(model,test_loader,path,cfg,data_handler.test_dict)
test_evaluator_beam = Evaluator(model,test_loader,path,cfg,data_handler.test_dict,decoding_type='beam')


Training loop

In [None]:
#Training Loop
cfg.encoder_lr = 1e-4
cfg.decoder_lr = 1e-3
cfg.teacher_forcing_ratio = 1.0
model.update_hyperparameters(cfg)
val_loss = []
for e in range(1,3001):
    loss = model.train_epoch(train_loader,utils)
    if e%50 == 0 :
        print('Epoch -- >',e,'Loss -->',loss)
        print('greedy :',test_evaluator_greedy.evaluate(utils,model,e,loss))
        val_loss.append(model.loss_calculate(val_loader,utils))
        print('beam :',test_evaluator_beam.evaluate(utils,model,e,loss))
        print('semibeam :',test_evaluator_semibeam.evaluate(utils,model,e,loss))

In [None]:
features, targets, mask, max_length,_,_,_= dataiter.next()

In [None]:
tsr,txt = model.GreedyDecoding(features.to(cfg.device))
txt

In [None]:
tsr,txt,scores = model.BeamDecoding(features.to(cfg.device),return_single=False)
txt,scores

In [None]:
utils.target_tensor_to_caption(voc,targets)

# SA-LSTM

In [None]:
#Import configuration and model 

from config import ConfigSALSTM
from models.SA_LSTM.model import SALSTM

#create Mean pooling object
cfg = ConfigSALSTM(opt_encoder=True)
# specifying the dataset in configuration object from {'msvd','msrvtt'}
cfg.dataset = 'msrvtt'

#Changing the hyperparameters in configuration object
cfg.batch_size = 100 #training batch size
cfg.n_layers = 1    # number of layers in decoder rnn
cfg.decoder_type = 'lstm'  # from {'lstm','gru'}
cfg.dropout = 0.5
cfg.opt_param_init = False

#creation of path object
path = Path(cfg, os.getcwd())
#Vocabulary object, 
voc = Vocabulary(cfg)
#If vocabulary is already saved or downloaded the saved file
voc.load() #comment this if using vocabulary for the first time or with no saved file

min_count = 2 #remove all words below count min_count
voc.trim(min_count=min_count)
print('Vocabulary Size : ',voc.num_words)
#print('Vocabulary Size : ',voc.num_words) 

In [None]:
# Datasets and dataloaders
data_handler = DataHandler(cfg,path,voc)
train_dset,val_dset,test_dset = data_handler.getDatasets()
train_loader,val_loader,test_loader = data_handler.getDataloader(train_dset,val_dset,test_dset)

#Model object
model = SALSTM(voc,cfg,path)

#Evaluator object on test data
test_evaluator_greedy = Evaluator(model,test_loader,path,cfg,data_handler.test_dict)
test_evaluator_beam = Evaluator(model,test_loader,path,cfg,data_handler.test_dict,decoding_type='beam')

In [None]:
model = torch.load(os.path.join('Saved','sa_lstm_msvd.pt'))

In [None]:
#Training Loop
from torch.optim.lr_scheduler import ReduceLROnPlateau
cfg.encoder_lr = 1e-4
cfg.decoder_lr = 1e-4
cfg.teacher_forcing_ratio = 1.0
model.update_hyperparameters(cfg)
# lr_scheduler = ReduceLROnPlateau(model.dec_optimizer, mode='min', factor=cfg.lr_decay_gamma,
#                                      patience=cfg.lr_decay_patience, verbose=True)
for e in range(1,1351):
    loss_train = model.train_epoch(train_loader,utils)
    #loss_val = model.train_epoch(val_loader,utils)
    #lr_scheduler.step(loss_train)
    if e%50 == 0 :
        print('Epoch -- >',e,'Loss -->',loss_train)
        print('greedy :',test_evaluator_greedy.evaluate(utils,model,e,loss_train))
        print('beam :',test_evaluator_beam.evaluate(utils,model,e,loss_train))

In [None]:
dataiter = iter(val_loader)
features, targets, mask, max_length,_,motion_feat,object_feat= dataiter.next()


In [None]:
tsr,txt,_ = model.GreedyDecoding(features.to(cfg.device))
txt

In [None]:
utils.target_tensor_to_caption(voc,targets)

In [None]:
tsr,txt,scr = model.BeamDecoding(features.to(cfg.device))
txt

# RecNet

In [None]:
#Import configuration and model 

from config import ConfigRecNet
from models.RecNet.model import RecNet

#create Mean pooling object
cfg = ConfigRecNet()
# specifying the dataset in configuration object from {'msvd','msrvtt'}
cfg.dataset = 'msvd'

#Changing the hyperparameters in configuration object
cfg.batch_size = 100 #training batch size
cfg.n_layers = 1    # number of layers in decoder rnn
cfg.decoder_type = 'lstm'  # from {'lstm','gru'}


#creation of path object
path = Path(cfg,os.getcwd())
#Vocabulary object, 
voc = Vocabulary(cfg)
#If vocabulary is already saved or downloaded the saved file
voc.load() #comment this if using vocabulary for the first time or with no saved file
min_count = cfg.vocabulary_min_count #remove all words below count min_count
voc.trim(min_count=min_count)
print('Vocabulary Size : ',voc.num_words)
#print('Vocabulary Size : ',voc.num_words) 

In [None]:
# Datasets and dataloaders
data_handler = DataHandler(cfg,path,voc)
train_dset,val_dset,test_dset = data_handler.getDatasets()
train_loader,val_loader,test_loader = data_handler.getDataloader(train_dset,val_dset,test_dset)

#Model object
model = RecNet(voc,cfg,path)
#Evaluator object on test data
test_evaluator_greedy = Evaluator(model,test_loader,path,cfg,data_handler.test_dict)
test_evaluator_beam = Evaluator(model,test_loader,path,cfg,data_handler.test_dict,decoding_type='beam')

In [None]:
# Stage-1 Training, Or load model after stage-1 Training.

encoder_state_dict_file = os.path.join(path.saved_models_path,'sa_lstm_encoder_msvd.pt')
decoder_state_dict_file = os.path.join(path.saved_models_path,'sa_lstm_decoder_msvd.pt')
#print(encoder_state_dict_file)
model.encoder.load_state_dict(torch.load(encoder_state_dict_file))
model.decoder.load_state_dict(torch.load(decoder_state_dict_file))
print('greedy :',test_evaluator_greedy.evaluate(utils,model,1350,1.0))

In [None]:
#Stage-2 Training
cfg.encoder_lr = 1e-3
cfg.decoder_lr = 1e-3
cfg.global_lr = 1e-3
cfg.local_lr = 1e-2
cfg.teacher_forcing_ratio = 1.0
cfg.training_stage = 2
cfg.lmda = 0.1
model.update_hyperparameters(cfg)
# lr_scheduler = ReduceLROnPlateau(model.dec_optimizer, mode='min', factor=cfg.lr_decay_gamma,
#                                      patience=cfg.lr_decay_patience, verbose=True)
for e in range(1,2501):
    lloss_train, recloss_train = model.train_epoch(train_loader,utils)
    #loss_val = model.train_epoch(val_loader,utils)
    #lr_scheduler.step(loss_train)
    if e%10 == 0 :
        model.encoder.eval()
        model.decoder.eval()
        model.local_reconstructor.eval()
        print('Epoch -- >',e,'Likelihood Loss -->',lloss_train,'Reconstruction Loss -->',recloss_train)
        print('greedy :',test_evaluator_greedy.evaluate(utils,model,e,lloss_train))
        print('beam :',test_evaluator_beam.evaluate(utils,model,e,lloss_train))

In [None]:
dataiter = iter(train_loader)
features, targets, mask, max_length,_,motion_feat,object_feat= dataiter.next()
features.size()

# MARN

In [2]:
#Import configuration and model 


from config import ConfigMARN
from models.MARN.model import MARN

#create Mean pooling object
cfg = ConfigMARN()
# specifying the dataset in configuration object from {'msvd','msrvtt'}
cfg.dataset = 'msvd'

#Changing the hyperparameters in configuration object



#creation of path object
path = Path(cfg,os.getcwd())
#Vocabulary object, 
voc = Vocabulary(cfg)
#If vocabulary is already saved or downloaded the saved file
voc.load() #comment this if using vocabulary for the first time or with no saved file
min_count = cfg.vocabulary_min_count #remove all words below count min_count
voc.trim(min_count=min_count)
print('Vocabulary Size : ',voc.num_words)
#print('Vocabulary Size : ',voc.num_words) 

Saved/msvd_word2index_dic.p
keep_words 3981 / 12596 = 0.3161
Vocabulary Size :  3984


In [3]:
# Datasets and dataloaders
data_handler = DataHandler(cfg,path,voc)
train_dset,val_dset,test_dset = data_handler.getDatasets()
train_loader,val_loader,test_loader = data_handler.getDataloader(train_dset,val_dset,test_dset)

#Model object
model = MARN(voc,cfg,path)
#Evaluator object on test data
test_evaluator_greedy = Evaluator(model,test_loader,path,cfg,data_handler.test_dict)
#test_evaluator_beam = Evaluator(model,test_loader,path,cfg,data_handler.test_dict,decoding_type='beam')

In [4]:
#Training Loop (Stage-1)(Without memory decoder)
cfg.encoder_lr = 1e-4
cfg.decoder_lr = 1e-4
cfg.teacher_forcing_ratio = 1.0
#model.generate_memory(data_handler)
#print('Memory Generated')
model.opt_memory_decoder = False
model.update_hyperparameters(cfg)

#lr_scheduler = StepLR(model.dec_optimizer,300,gamma=0.1,verbose=False)
# lr_scheduler = ReduceLROnPlateau(model.dec_optimizer, mode='min', factor=cfg.lr_decay_gamma,
#                                      patience=cfg.lr_decay_patience, verbose=True)
for e in range(1,501):
    loss_train,ac_loss = model.train_epoch(train_loader,utils)
    #loss_val = model.train_epoch(val_loader,utils)
    #lr_scheduler.step()
    if e%25 == 0 :
        print('Epoch -- >',e,'Loss -->',loss_train,'  AC loss --->',ac_loss)
        print('greedy :',test_evaluator_greedy.evaluate(utils,model,e,loss_train))
        #print('beam :',test_evaluator_beam.evaluate(utils,model,e,loss_train))

Epoch -- > 25 Loss --> 3.9958693362308884   AC loss ---> 0.03090188533067703
{'testlen': 3890, 'reflen': 3886, 'guess': [3890, 3220, 2550, 1880], 'correct': [2759, 1181, 518, 106]}
ratio: 1.0010293360779718
greedy : {'Bleu_1': 0.7092544987144707, 'Bleu_2': 0.5100327486080173, 'Bleu_3': 0.3752567976872323, 'Bleu_4': 0.23363259354817334, 'METEOR': 0.24120785959776894, 'ROUGE_L': 0.610236566114215, 'CIDEr': 0.15639253169251544}
Epoch -- > 50 Loss --> 3.59046994521975   AC loss ---> 0.02184500128030777
{'testlen': 3727, 'reflen': 3719, 'guess': [3727, 3057, 2387, 1717], 'correct': [2890, 1430, 711, 249]}
ratio: 1.0021511158910992
greedy : {'Bleu_1': 0.77542259189676, 'Bleu_2': 0.6022676335250706, 'Bleu_3': 0.47628336154073075, 'Bleu_4': 0.3537987715241685, 'METEOR': 0.277348359131785, 'ROUGE_L': 0.6552164314236341, 'CIDEr': 0.390500539843794}
Epoch -- > 75 Loss --> 3.3389997142232097   AC loss ---> 0.018926209211349486
{'testlen': 3721, 'reflen': 3712, 'guess': [3721, 3051, 2381, 1711], 'c

{'testlen': 4096, 'reflen': 4089, 'guess': [4096, 3426, 2756, 2086], 'correct': [3335, 1998, 1156, 552]}
ratio: 1.0017119100022005
greedy : {'Bleu_1': 0.8142089843748012, 'Bleu_2': 0.6890837488680113, 'Bleu_3': 0.5839928708663213, 'Bleu_4': 0.47913936448993966, 'METEOR': 0.3411564377362805, 'ROUGE_L': 0.7124298204181696, 'CIDEr': 0.87774070599289}


In [None]:
#Training Loop (Stage-2)(With Attended Memory decoder)
cfg.encoder_lr = 1e-4
cfg.decoder_lr = 1e-4
cfg.teacher_forcing_ratio = 1.0
model.generate_memory(data_handler)
print('Memory Generated')
model.opt_memory_decoder = True
model.update_hyperparameters(cfg)

#lr_scheduler = StepLR(model.dec_optimizer,300,gamma=0.1,verbose=False)
# lr_scheduler = ReduceLROnPlateau(model.dec_optimizer, mode='min', factor=cfg.lr_decay_gamma,
#                                      patience=cfg.lr_decay_patience, verbose=True)
for e in range(501,1501):
    loss_train,ac_loss = model.train_epoch(train_loader,utils)
    #loss_val = model.train_epoch(val_loader,utils)
    #lr_scheduler.step()
    if e%25 == 0 :
        print('Epoch -- >',e,'Loss -->',loss_train,'  AC loss --->',ac_loss)
        print('greedy :',test_evaluator_greedy.evaluate(utils,model,e,loss_train))
        #print('beam :',test_evaluator_beam.evaluate(utils,model,e,loss_train))

Memory Generated
