In [1]:
import os
import sys

In [2]:
repo_path = os.path.abspath(".")
sys.path.append(repo_path)

In [62]:
import copy
import datetime
import logging
import os
import time
from os.path import join

import pandas as pd
import torch
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import wandb
import numpy as np

from dataset import MetaLoader
from models.umt import UMT
from tasks.pretrain import setup_dataloaders
from tasks.retrieval_utils import evaluation_wrapper
from tasks.shared_utils import setup_model
from utils.basic_utils import MetricLogger, SmoothedValue, setup_seed
from utils.config import Config
from utils.config_utils import setup_main
from utils.distributed import get_rank, is_main_process
from utils.logger import log_dict_to_wandb, setup_wandb

In [8]:
from utils.config import Config

In [9]:
config = Config.from_file(filepath="./exp/zero_shot/ret_msrvtt/b16.py")

In [10]:
config

{'data_dir': 'your_data_path/anno',
 'data_root': 'your_data_path/anno/videos_images',
 'anno_root_pt': 'your_data_path/anno/anno_pretrain',
 'anno_root_downstream': 'your_data_path/anno/anno_downstream',
 'TextEncoders': {'bert': {'name': 'bert_base',
   'pretrained': 'bert-base-uncased',
   'config': 'configs/config_bert.json',
   'd_model': 768,
   'fusion_layer': 9},
  'bert_large': {'name': 'bert_large',
   'pretrained': 'bert-large-uncased',
   'config': 'configs/config_bert_large.json',
   'd_model': 1024,
   'fusion_layer': 19}},
 'train_file': ['your_data_path/anno/anno_downstream/msrvtt_ret_train9k.json',
  'your_msrvtt_path',
  'video'],
 'test_file': {'test': ['your_data_path/anno/anno_downstream/msrvtt_ret_test1k.json',
   'your_msrvtt_path',
   'video']},
 'test_types': ['test'],
 'num_workers': 6,
 'stop_key': 'test/',
 'is_paragraph_retrieval': False,
 'num_frames': 4,
 'num_frames_test': 4,
 'batch_size': 32,
 'max_txt_l': 32,
 'inputs': {'image_res': 224,
  'video_inp

In [13]:
!ls /work/piyush/pretrained_checkpoints/LargeModels/UnmaskedTeachers/b16_25m.pth

/work/piyush/pretrained_checkpoints/LargeModels/UnmaskedTeachers/b16_25m.pth


In [15]:
# Setup path to pre-trained checkpoint
config.pretrained_path = "/work/piyush/pretrained_checkpoints/LargeModels/UnmaskedTeachers/b16_25m.pth"

In [23]:
from configs.model import *

In [26]:
# Define the text encoder
config.model.text_encoder = TextEncoders["bert"]

In [40]:
config.model.vision_encoder.num_frames = config.num_frames

In [48]:
config.distributed = False

In [56]:
config.scheduler.num_warmup_steps = 1
config.scheduler.num_training_steps = 1
config.auto_resume = False

In [41]:
setup_seed(config.seed + get_rank())
device = torch.device(config.device)
cudnn.benchmark = True

In [45]:
ckpt_root = "/work/piyush/pretrained_checkpoints/LargeModels/UnmaskedTeachers/"
config.model.vision_encoder.pretrained = os.path.join(ckpt_root, "b16_ptk710_f8_res224.pth")

In [57]:
model_cls = eval(config.model.get('model_cls', 'UMT'))
(
    model,
    model_without_ddp,
    optimizer,
    scheduler,
    scaler,
    tokenizer,
    start_epoch,
    global_step,
) = setup_model(
    config,
    model_cls=model_cls,
    has_decoder=False,
    pretrain=False,
    # find_unused_parameters=True,
    find_unused_parameters=False,
)



In [63]:
np.sum([p.numel() for p in model.parameters()]) / 1e6

202.411267