In [1]:
import argparse
import re
from collections import OrderedDict
import json
import math
import os
import sys
import time
# import subprocess

try:
    import wandb
except ImportError:
    wandb = None

import numpy as np
import torch
import torch.cuda.amp as amp
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms

from datasets import get_dataset
import models
from tokenizer import SimpleTokenizer
from utils import AverageMeter, ProgressMeter, accuracy
import utils
from torchvision.datasets import ImageFolder
from utils import GaussianBlur, Solarize
from losses import DetailCLIPLoss, get_metric_names
import torch.distributed as dist

ModuleNotFoundError: No module named 'timm'

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
def validate_zeroshot(val_loader, model, tokenizer, ema=False):
    batch_time = AverageMeter('Time', ':6.3f')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(val_loader),
        [batch_time, top1, top5],
        prefix='Test: ')

    # switch to evaluate mode
    model.eval()

    print('=> encoding captions')
    cwd = os.path.dirname(os.path.realpath('test_ckp.ipynb'))
    with open(os.path.join(cwd, 'templates.json')) as f:
        templates = json.load(f)['fer2013']

    with open(os.path.join(cwd, 'labels.json')) as f:
        labels = json.load(f)['fer2013']


    with torch.no_grad():
        text_features = []
        for l in labels:
            texts = [t.format(l) for t in templates]
            texts = tokenizer(texts).cuda(device, non_blocking=True)
            class_embeddings = utils.get_model(model).encode_text(texts, ema=ema)
            class_embeddings = class_embeddings / class_embeddings.norm(dim=-1, keepdim=True)
            class_embeddings = class_embeddings.mean(dim=0)
            class_embeddings = class_embeddings / class_embeddings.norm(dim=-1, keepdim=True)
            text_features.append(class_embeddings)
        text_features = torch.stack(text_features, dim=0)

        end = time.time()
        for i, (images, target) in enumerate(val_loader):
            images = images.cuda(device, non_blocking=True)
            target = target.cuda(device, non_blocking=True)

            # encode images
            image_features = utils.get_model(model).encode_image(images, ema=ema)
            image_features = image_features / image_features.norm(dim=-1, keepdim=True)

            # cosine similarity as logits
            logits_per_image = image_features @ text_features.t()

            # measure accuracy and record loss
            acc1, acc5 = accuracy(logits_per_image, target, topk=(1, 5))
            acc1, acc5 = utils.scaled_all_reduce([acc1, acc5])
            top1.update(acc1.item(), images.size(0))
            top5.update(acc5.item(), images.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            # if i % args.print_freq == 0:
            #     progress.display(i)

    progress.synchronize()
    print('0-shot * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
          .format(top1=top1, top5=top5))
    return {'acc1': top1.avg, 'acc5': top5.avg}


In [None]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
                                     
val_transform = transforms.Compose([
            transforms.Resize(224),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize
        ])

cwd = os.path.dirname(os.path.realpath('test_ckp.ipynb'))
with open(os.path.join(cwd, 'dataset_catalog.json')) as f:
        root = json.load(f)['fer2013']['path']

#add val folder for imagenet 1k
val_dataset = ImageFolder(os.path.join(root, 'test'), val_transform)

val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=5, shuffle=None,
        num_workers=1, pin_memory=True, sampler=None, drop_last=False)

In [None]:
tokenizer = SimpleTokenizer()

In [None]:
ckpt_path = 'checkpoint_best.pt'
ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=False)

model_name='DETAILCLIP_VITB16'
model = getattr(models, model_name)()
model.cuda()

# create model
old_args = ckpt['args']
print("=> creating model: {}".format(old_args.model))

state_dict = OrderedDict()
for k, v in ckpt['state_dict'].items():
    state_dict[k.replace('module.', '')] = v

model.load_state_dict(state_dict, strict=True)
print("=> loaded resume checkpoint '{}' (epoch {})".format(model_name, ckpt['epoch']))



	Creating MAE projection head
	MAE projection head created
	Creating IBOT projection head


  WeightNorm.apply(module, name, dim)


	keys have been loaded for ibot head with status: <All keys matched successfully>
	IBOT projection head created
	DetailCLIP model created
=> creating model: ICLIP_VITB16
=> loaded resume checkpoint 'DETAILCLIP_VITB16' (epoch 48)


In [None]:
print(validate_zeroshot(val_loader, model, tokenizer, ema=False))

=> encoding captions


TypeError: VisionTransformer.forward() got an unexpected keyword argument 'mask'

In [None]:
ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=False)
old_args = ckpt['args']

model = getattr(models, model_name)()
model.cuda()
model.load_state_dict(state_dict, strict=True)
print("=> loaded resume checkpoint '{}' (epoch {})".format(args.resume, ckpt['epoch']))


data_path = '/home/onyxia/work/datasets/stanford_dogs'



TypeError: 'DefaultCfg' object is not subscriptable

In [None]:
# optionally resume from a checkpoint (takes precedence over autoresume)


ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=False)
state_dict = OrderedDict()
for k, v in ckpt['state_dict'].items():
    state_dict[k.replace('module.', '')] = v

