In [28]:
import os
import numpy as np
import json
from collections import defaultdict
from shutil import copyfile

# Create Validation Set

In [7]:
data_dir = "/usr0/home/gis/research/vis_lang/data/oxford_pets"
os.listdir(data_dir)

['valid',
 'annotations',
 'annotations.tar.gz',
 'images',
 'images.tar.gz',
 'test',
 'train']

In [3]:
def read_annotations(path):
    image_names = []
    with open(path, 'r') as handle:
        for line in handle:
            image_name, _, _, _ = line.strip().split(' ')
            image_names.append(image_name)
    return image_names



In [6]:
trainval_annotations = read_annotations(os.path.join(data_dir, 'annotations', 'trainval.txt'))
test_annotations = read_annotations(os.path.join(data_dir, 'annotations', 'test.txt'))
len(trainval_annotations), len(test_annotations)

(3680, 3669)

In [9]:
image_files = os.listdir(os.path.join(data_dir, 'images'))
len(image_files), image_files[0]

(7393, 'american_bulldog_75.jpg')

In [15]:
def assign_class2ids(annotations):
    class2ids = {}
    for annotation in annotations:
        components = annotation.split('_')
        label = '_'.join(components[:-1])
        ref = components[-1]
        if label not in class2ids:
            class2ids[label] = []
        class2ids[label].append(ref)
    return class2ids

def create_filenames(label, ids):
    filenames = []
    for ref in ids:
        filename = f'{label}_{ref}.jpg'
        filenames.append(filename)
    return filenames

def split_partition(class2ids, proportion=.5):
    split1, split2 = defaultdict(lambda: []), defaultdict(lambda: [])
    for label, ids in class2ids.items():
        np.random.shuffle(ids)
        split_bound = int(len(ids) * proportion)
        split1_files = create_filenames(label, ids[:split_bound])
        split2_files = create_filenames(label, ids[split_bound:])
        split1[label] = split1_files
        split2[label]= split2_files
    return split1, split2
        

In [12]:
trainval_class2ids = assign_class2ids(trainval_annotations)
test_class2ids = assign_class2ids(test_annotations)
len(trainval_class2ids), len(test_class2ids)

(37, 37)

In [24]:
train_set, val_set = split_partition(trainval_class2ids, proportion=.5)
test_set, _ = split_partition(test_class2ids, proportion=1.)
len(train_set), len(val_set), len(test_set)

(37, 37, 37)

In [17]:
list(train_set.keys())[:5]

['Abyssinian',
 'american_bulldog',
 'american_pit_bull_terrier',
 'basset_hound',
 'beagle']

In [19]:
len(train_set['Abyssinian'])

50

In [41]:
def create_file_structures(dataset, save_dir, source_dir):
    for label, filenames in dataset.items():
        label_dir = os.path.join(save_dir, label)
        os.makedirs(label_dir, exist_ok=True)
        for filename in filenames:
            source_file = os.path.join(source_dir, filename)
            dest_file = os.path.join(label_dir, filename)
            copyfile(source_file, dest_file)

In [42]:
create_file_structures(
    train_set, 
    save_dir=os.path.join(data_dir, 'train'),
    source_dir=os.path.join(data_dir, 'images')
)

In [43]:
create_file_structures(
    val_set, 
    save_dir=os.path.join(data_dir, 'valid'),
    source_dir=os.path.join(data_dir, 'images')
)
create_file_structures(
    test_set, 
    save_dir=os.path.join(data_dir, 'test'),
    source_dir=os.path.join(data_dir, 'images')
)

In [47]:
os.listdir(data_dir)

['valid',
 'annotations',
 'annotations.tar.gz',
 'images',
 'images.tar.gz',
 'test',
 'train']

In [11]:
sorted(os.listdir(os.path.join(data_dir, 'train')))

['Abyssinian',
 'Bengal',
 'Birman',
 'Bombay',
 'British_Shorthair',
 'Egyptian_Mau',
 'Maine_Coon',
 'Persian',
 'Ragdoll',
 'Russian_Blue',
 'Siamese',
 'Sphynx',
 'american_bulldog',
 'american_pit_bull_terrier',
 'basset_hound',
 'beagle',
 'boxer',
 'chihuahua',
 'english_cocker_spaniel',
 'english_setter',
 'german_shorthaired',
 'great_pyrenees',
 'havanese',
 'japanese_chin',
 'keeshond',
 'leonberger',
 'miniature_pinscher',
 'newfoundland',
 'pomeranian',
 'pug',
 'saint_bernard',
 'samoyed',
 'scottish_terrier',
 'shiba_inu',
 'staffordshire_bull_terrier',
 'wheaten_terrier',
 'yorkshire_terrier']

# Run Initial Zero Shot Experiment

In [1]:
import os
import clip
import torch
from torchvision import transforms, models

import numpy as np
from sklearn.linear_model import LogisticRegression
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm

import argparse
from omegaconf import OmegaConf

import json

from datasets import *
device = "cuda" if torch.cuda.is_available() else "cpu"
from sklearn.metrics.pairwise import euclidean_distances, cosine_similarity
import seaborn as sn

from columnar import columnar
from nltk.corpus import wordnet as wn

In [2]:
def clip_zero_shot(
    loader,
    classes,
    zeroshot_weights,
    clip_model_name="ViT-B/32",
):

    global clip_model, clip_preprocess
    device = "cuda" if torch.cuda.is_available() else "cpu"

    def accuracy(output, target, topk=(1,)):
        pred = output.topk(max(topk), 1, True, True)[1].t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))
        return [
            float(correct[:k].reshape(-1).float().sum(0, keepdim=True).cpu().numpy())
            for k in topk
        ]

    # lazy load
    if clip_model == None:
        clip_model, clip_preprocess = clip.load(clip_model_name, device)

    per_class_accuracy_top1 = { k:[0,0, classes[k]] for k in range(len(classes))} # correct, total, class_name
    per_class_accuracy_top5 = { k:[0,0, classes[k]] for k in range(len(classes))} 

    with torch.no_grad():
        top1, top5, n = 0.0, 0.0, 0.0
        for i , (images, target) in enumerate(tqdm(loader)):
            images = images.cuda()
            target = target.cuda()

            # predict
            image_features = clip_model.encode_image(images)
            image_features /= image_features.norm(dim=-1, keepdim=True)
            logits = 100.0 * image_features @ zeroshot_weights

            # measure accuracy
            acc1, acc5 = accuracy(logits, target, topk=(1, 5))
            top1 += acc1
            top5 += acc5
            n += images.size(0)
            
            per_class_accuracy_top1[target.cpu().detach().numpy()[0]][0]+= acc1
            per_class_accuracy_top1[target.cpu().detach().numpy()[0]][1]+= 1

    top1 = (top1 / n) * 100
    top5 = (top5 / n) * 100

    return top1, per_class_accuracy_top1

In [3]:
dataset_obj = OxfordPets(4, 1, root='/usr0/home/gis/research/vis_lang/data/')
clip_model, clip_preprocess = clip.load("ViT-B/32", )
train_loader, _ = dataset_obj.get_train_loaders(transform_fn=clip_preprocess)
test_loader = dataset_obj.get_test_loader(transform_fn=clip_preprocess)

In [4]:
phrase_file = "/home/gis/research/vis_lang/efficient_finetuning/configs/phrases/pets.txt"

templates = []
with open(phrase_file) as f:
    templates = [line for line in f]
    
    def zeroshot_classifier(classnames, templates):
        with torch.no_grad():
            zeroshot_weights = []
            for classname in classnames:
                classname = ' '.join(classname.split('_'))
                texts = [
                    template.format(classname) for template in templates
                ]  # format with class
                texts = clip.tokenize(texts).cuda()  # tokenize
                class_embeddings = clip_model.encode_text(texts)  # embed with text encoder
                class_embeddings /= class_embeddings.norm(dim=-1, keepdim=True)
                class_embedding = class_embeddings.mean(dim=0)
                class_embedding /= class_embedding.norm()
                zeroshot_weights.append(class_embedding)
            zeroshot_weights = torch.stack(zeroshot_weights, dim=1).cuda()
        return zeroshot_weights

baseline_zw = zeroshot_classifier(dataset_obj.classes,templates)

baseline_czs = clip_zero_shot(
    test_loader,
    dataset_obj.classes,
    baseline_zw
)
print(baseline_czs[0])

  0%|          | 0/3669 [00:00<?, ?it/s]

87.05369310438812


In [8]:
set(os.listdir(os.path.join(data_dir, 'test'))) - set(dataset_obj.classes)

{'British_Shorthair',
 'Egyptian_Mau',
 'Maine_Coon',
 'Russian_Blue',
 'american_bulldog',
 'american_pit_bull_terrier',
 'basset_hound',
 'english_cocker_spaniel',
 'english_setter',
 'german_shorthaired',
 'great_pyrenees',
 'japanese_chin',
 'miniature_pinscher',
 'saint_bernard',
 'scottish_terrier',
 'shiba_inu',
 'staffordshire_bull_terrier',
 'wheaten_terrier',
 'yorkshire_terrier'}

In [10]:
len(os.listdir(os.path.join(data_dir, 'test'))), len(dataset_obj.classes)

(37, 35)