In [1]:
from classes import CLASSES, GENERIC_PROMPT_COLLECTIONS
import torch.nn as nn
from clip import clip
import os
import torch


# Initialize CLIP models 
class TextEncoder(nn.Module):
    def __init__(self, clip_model):
        super().__init__()
        self.transformer = clip_model.transformer
        self.positional_embedding = clip_model.positional_embedding
        self.ln_final = clip_model.ln_final
        self.text_projection = clip_model.text_projection
        self.dtype = clip_model.dtype
        self.token_embedding = clip_model.token_embedding

    def forward(self, text):
        x = self.token_embedding(text).type(self.dtype)  # [batch_size, n_ctx, d_model]

        x = x + self.positional_embedding.type(self.dtype)
        x = x.permute(1, 0, 2)  # NLD -> LND
        x = self.transformer(x)
        x = x.permute(1, 0, 2)  # LND -> NLD
        x = self.ln_final(x).type(self.dtype)

        # x.shape = [batch_size, n_ctx, transformer.width]
        # take features from the eot embedding (eot_token is the highest number in each sequence)
        x = x[torch.arange(x.shape[0]), text.argmax(dim=-1)] @ self.text_projection

        return x

def load_clip_to_cpu(visual_backbone):
    backbone_name = visual_backbone
    url = clip._MODELS[backbone_name]
    model_path = clip._download(url, os.path.expanduser("~/.cache/clip"))

    try:
        # loading JIT archive
        model = torch.jit.load(model_path, map_location="cpu").eval()
        state_dict = None

    except RuntimeError:
        state_dict = torch.load(model_path, map_location="cpu")

    model = clip.build_model(state_dict or model.state_dict())

    return model

clip_model = load_clip_to_cpu("RN50")

visual_model = torch.nn.DataParallel(clip_model.visual).cuda()

text_model = TextEncoder(clip_model)
text_model = torch.nn.DataParallel(text_model).cuda()

In [2]:
import json
import pathlib
import os
import sys  
sys.path.insert(0, '/nethome/bdevnani3/flash1/long_tail_lang/')
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from tqdm.notebook import trange, tqdm
from os import listdir
from os.path import isfile, join
from clip import clip

In [3]:
# Load the model
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load('RN50', device)

# Download the dataset
# cifar100 = CIFAR100(root=os.path.expanduser("~/.cache"), download=True, train=False)

# # Prepare the inputs
# image, class_id = cifar100[3637]
# image_input = preprocess(image).unsqueeze(0).to(device)
# text_inputs = torch.cat([clip.tokenize(f"a photo of a {c}") for c in cifar100.classes]).to(device)

# # Calculate features
# with torch.no_grad():
#     image_features = model.encode_image(image_input)
#     text_features = model.encode_text(text_inputs)

# # Pick the top 5 most similar labels for the image
# image_features /= image_features.norm(dim=-1, keepdim=True)
# text_features /= text_features.norm(dim=-1, keepdim=True)

In [4]:
import numpy as np 
all_labels_text = {}
data = []
labels = []


with torch.no_grad():
    for label in tqdm(range(1000)):
        all_labels_text[label] = []

        templates = np.array(GENERIC_PROMPT_COLLECTIONS["ImageNet"])
        c = np.array(CLASSES)[label]
            
        texts = clip.tokenize([template.format(c) for template in templates]) 
        texts = texts.cuda()
        zeroshot_weights = model.encode_text(texts).float()
        zeroshot_weights = zeroshot_weights / zeroshot_weights.norm(
            dim=-1, keepdim=True
        )
        all_labels_text[label].append(zeroshot_weights)
        data.append(zeroshot_weights)
        for i in range(len(templates)):
            labels.append(label)
        
data = torch.cat(data)

  0%|          | 0/1000 [00:00<?, ?it/s]

In [5]:
from data_loader import dataloaders as dataloader
d = dataloader.load_data(
    data_root="../datasets/ImageNet/",
    dataset="ImageNet_LT",
    phase="test",
    batch_size=128,
#     batch_size=1,
    sampler_dic=None,
    num_workers=12,
    type="random_prompts",
    prompt_set="ImageNet",
)
data_test = d[0]

from classes import CLASSES, CUSTOM_TEMPLATES, GENERIC_PROMPT_COLLECTIONS
from tqdm.notebook import trange, tqdm

test_images, test_texts = [], []
test_labels = []

with torch.no_grad():
    for im, label, _, path in tqdm(data_test):
        x = visual_model(im.half()).float()
        x = x / x.norm(dim=-1, keepdim=True)
        test_images.append(x)
        test_labels.append(label)
        
test_images = torch.cat(test_images, dim=0)
test_labels = torch.cat(test_labels, dim=0)

Loading data from /nethome/bdevnani3/flash1/long_tail_lang/data/ImageNet_LT/ImageNet_LT_test.txt
Use data transformation: Compose(
    Resize(size=224, interpolation=bicubic, max_size=None, antialias=None)
    CenterCrop(size=(224, 224))
    ToTensor()
    Normalize(mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])
)
***********************DATASET: test random_prompts
test 50000
No sampler.
Shuffle is True.


  0%|          | 0/391 [00:00<?, ?it/s]

In [6]:
lt_images = torch.load("../clip_embedded_images.pt")
lt_images_labels = torch.load("../clip_embedded_image_labels.pt")

In [7]:
freqs = {}
for label in lt_images_labels:
    label = int(label.item())
    if label not in freqs:
        freqs[label] = 0
    freqs[label] +=1
    
label_cats = {"many":[], "med":[], "few":[]}
for label in set(lt_images_labels):
    label = label.item()
    if freqs[label] > 100:
        label_cats["many"].append(label)
    elif freqs[label] > 20:
        label_cats["med"].append(label)
    else:
        label_cats["few"].append(label)
import json
# json.dump(freqs, open("/nethome/bdevnani3/flash1/long_tail_lang/embedding_datasets/class_frequencies.json", "w"))
# json.dump(label_cats, open("/nethome/bdevnani3/flash1/long_tail_lang/embedding_datasets/label_classification.json", "w"))

many_test_images, med_test_images, few_test_images = [], [], []
many_test_labels, med_test_labels, few_test_labels = [], [], []

for i,l in tqdm(zip(test_images, test_labels)):
    
    if l.item() in label_cats["many"]:
        many_test_images.append(i)
        many_test_labels.append(l.item())
    elif l.item() in label_cats["med"]:
        med_test_images.append(i)
        med_test_labels.append(l.item())
    elif l.item() in label_cats["few"]:
        few_test_images.append(i)
        few_test_labels.append(l.item())
        
many_test_images = torch.stack(many_test_images,dim=1)
med_test_images = torch.stack(med_test_images,dim=1)
few_test_images = torch.stack(few_test_images,dim=1)
many_test_labels = torch.tensor(many_test_labels)
med_test_labels = torch.tensor(med_test_labels)
few_test_labels = torch.tensor(few_test_labels)

print(many_test_images.shape, many_test_labels.shape)
print(med_test_images.shape, med_test_labels.shape)
print(few_test_images.shape, few_test_labels.shape)

0it [00:00, ?it/s]

torch.Size([1024, 19250]) torch.Size([19250])
torch.Size([1024, 23450]) torch.Size([23450])
torch.Size([1024, 7300]) torch.Size([7300])


In [23]:
from sklearn.linear_model import LogisticRegression

In [None]:
clf = LogisticRegression(random_state=0, verbose=1, n_jobs=-1).fit(data.cpu(), labels)
print(clf.score(data.cpu(), labels))
print(clf.score(lt_images.cpu(), lt_images_labels.cpu()))
print(clf.score(test_images.cpu(), test_labels.cpu()))

print("Many:", clf.score(many_test_images.T.cpu(), many_test_labels.cpu()))
print("Med:", clf.score(med_test_images.T.cpu(), med_test_labels.cpu()))
print("Few:", clf.score(few_test_images.T.cpu(), few_test_labels.cpu()))
print("All:", clf.score(test_images.cpu(), test_labels.cpu()))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 20 concurrent workers.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =      1025000     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  5.66436D+05    |proj g|=  3.41085D+01


 This problem is unconstrained.


In [None]:
proj = np.load("../imagenet_text2img_upsampled2_balanced_proj200.npy")

In [18]:
data_proj = np.matmul(data.cpu(), proj)
clf_proj = LogisticRegression(random_state=0, verbose=1, n_jobs=-1).fit(data_proj.cpu(), labels)
print(clf_proj.score(data_proj.cpu(), labels))
print(clf_proj.score(lt_images.cpu(), lt_images_labels.cpu()))
print(clf_proj.score(test_images.cpu(), test_labels.cpu()))

print("Many:", clf_proj.score(many_test_images.T.cpu(), many_test_labels.cpu()))
print("Med:", clf_proj.score(med_test_images.T.cpu(), med_test_labels.cpu()))
print("Few:", clf_proj.score(few_test_images.T.cpu(), few_test_labels.cpu()))
print("All:", clf_proj.score(test_images.cpu(), test_labels.cpu()))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 10 concurrent workers.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =      1025000     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  5.66436D+05    |proj g|=  1.98860D+01


 This problem is unconstrained.



At iterate   50    f=  2.86660D+05    |proj g|=  2.55931D-01

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
*****     67     76      1     0     0   4.672D-02   2.867D+05
  F =   286660.26624696294     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  5.4min finished


0.997719512195122
0.5206049410424184
0.50536
Many: 0.5295064935064935
Med: 0.49415778251599146
Few: 0.4776712328767123
All: 0.50536


In [19]:
proj400 = np.load("../imagenet_text2img_upsampled2_balanced_proj400.npy")

In [20]:
data_proj400 = np.matmul(data.cpu(), proj400)
clf_proj400 = LogisticRegression(random_state=0, verbose=1, n_jobs=-1).fit(data_proj400.cpu(), labels)
clf_proj400.score(data_proj.cpu(), labels)
clf_proj400.score(lt_images.cpu(), lt_images_labels)

print("Many:", clf_proj400.score(many_test_images.T.cpu(), many_test_labels.cpu()))
print("Med:", clf_proj400.score(med_test_images.T.cpu(), med_test_labels.cpu()))
print("Few:", clf_proj400.score(few_test_images.T.cpu(), few_test_labels.cpu()))
print("All:", clf_proj400.score(test_images.cpu(), test_labels.cpu()))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 10 concurrent workers.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =      1025000     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  5.66436D+05    |proj g|=  1.93401D+01


 This problem is unconstrained.



At iterate   50    f=  2.87502D+05    |proj g|=  8.18398D-01

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
*****     63     74      1     0     0   5.916D-02   2.875D+05
  F =   287501.73315015755     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  4.8min finished


Many: 0.5305454545454545
Med: 0.4933049040511727
Few: 0.4706849315068493
All: 0.50434


In [21]:
projmix = np.load("../imagenet_text2img_upsampled5_balanced_proj.npy")

data_projmix = np.matmul(data.cpu(), projmix)
clf_projmix = LogisticRegression(random_state=0, verbose=1, n_jobs=-1).fit(data_projmix.cpu(), labels)
# clf_projmix.score(clf_projmix.cpu(), labels)
# clf_projmix.score(lt_images.cpu(), lt_images_labels)

print("Many:", clf_projmix.score(many_test_images.T.cpu(), many_test_labels.cpu()))
print("Med:", clf_projmix.score(med_test_images.T.cpu(), med_test_labels.cpu()))
print("Few:", clf_projmix.score(few_test_images.T.cpu(), few_test_labels.cpu()))
print("All:", clf_projmix.score(test_images.cpu(), test_labels.cpu()))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 10 concurrent workers.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =      1025000     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  5.66436D+05    |proj g|=  1.33026D+01


 This problem is unconstrained.



At iterate   50    f=  2.99440D+05    |proj g|=  1.56454D-01

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
*****     55     67      1     0     0   5.021D-02   2.994D+05
  F =   299440.38180914812     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  4.4min finished


Many: 0.5495064935064935
Med: 0.5198294243070363
Few: 0.5089041095890411
All: 0.52966


In [22]:
projmix = np.load("../imagenet_text2img_upsampled4_balanced_proj.npy")

data_projmix = np.matmul(data.cpu(), projmix)
clf_projmix = LogisticRegression(random_state=0, verbose=1, n_jobs=-1).fit(data_projmix.cpu(), labels)
# clf_projmix.score(clf_projmix.cpu(), labels)
# clf_projmix.score(lt_images.cpu(), lt_images_labels)

print("Many:", clf_projmix.score(many_test_images.T.cpu(), many_test_labels.cpu()))
print("Med:", clf_projmix.score(med_test_images.T.cpu(), med_test_labels.cpu()))
print("Few:", clf_projmix.score(few_test_images.T.cpu(), few_test_labels.cpu()))
print("All:", clf_projmix.score(test_images.cpu(), test_labels.cpu()))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 10 concurrent workers.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =      1025000     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  5.66436D+05    |proj g|=  1.79337D+01


 This problem is unconstrained.



At iterate   50    f=  3.13100D+05    |proj g|=  7.53471D-01

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
*****     62     73      1     0     0   2.077D-01   3.131D+05
  F =   313099.83803925075     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  5.1min finished


Many: 0.5076883116883116
Med: 0.47961620469083155
Few: 0.47054794520547943
All: 0.4891


In [27]:
projmix = np.load("../imagenet_text2img_upsampled4_balanced_proj800.npy")

data_projmix = np.matmul(data.cpu(), projmix)
clf_projmix = LogisticRegression(random_state=0, verbose=1, n_jobs=-1).fit(data_projmix.cpu(), labels)
# clf_projmix.score(clf_projmix.cpu(), labels)
# clf_projmix.score(lt_images.cpu(), lt_images_labels)

print("Many:", clf_projmix.score(many_test_images.T.cpu(), many_test_labels.cpu()))
print("Med:", clf_projmix.score(med_test_images.T.cpu(), med_test_labels.cpu()))
print("Few:", clf_projmix.score(few_test_images.T.cpu(), few_test_labels.cpu()))
print("All:", clf_projmix.score(test_images.cpu(), test_labels.cpu()))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 10 concurrent workers.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =      1025000     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  5.66436D+05    |proj g|=  1.75366D+01


 This problem is unconstrained.



At iterate   50    f=  3.13635D+05    |proj g|=  1.02537D+00

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
*****     63     69      1     0     0   1.526D-02   3.136D+05
  F =   313634.96176935395     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  4.5min finished


Many: 0.507012987012987
Med: 0.48136460554371
Few: 0.4724657534246575
All: 0.48994


In [34]:
cupl_prompt_embeds= torch.load("../embedding_datasets/clip/Balanced_culp_text_train_ImageNet/text_embeddings.pt")
cupl_prompt_labels= torch.load("../embedding_datasets/clip/Balanced_culp_text_train_ImageNet/labels.pt")


50

In [36]:
projmix = np.load("../proj_matrices/cupl_l2_proj.npy")


clf_projmix = LogisticRegression(random_state=0, verbose=1, n_jobs=-1).fit(cupl_prompt_embeds.cpu(), cupl_prompt_labels)
# clf_projmix.score(clf_projmix.cpu(), labels)
# clf_projmix.score(lt_images.cpu(), lt_images_labels)

print("Many:", clf_projmix.score(many_test_images.T.cpu(), many_test_labels.cpu()))
print("Med:", clf_projmix.score(med_test_images.T.cpu(), med_test_labels.cpu()))
print("Few:", clf_projmix.score(few_test_images.T.cpu(), few_test_labels.cpu()))
print("All:", clf_projmix.score(test_images.cpu(), test_labels.cpu()))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 20 concurrent workers.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =      1025000     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  3.45388D+05    |proj g|=  2.01144D+01


 This problem is unconstrained.



At iterate   50    f=  1.19202D+05    |proj g|=  1.94563D-02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
*****     50     58      1     0     0   1.946D-02   1.192D+05
  F =   119201.88999533509     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 54.9min finished


Many: 0.5105974025974026
Med: 0.4977398720682303
Few: 0.4643835616438356
All: 0.49782


In [None]:
projmix = np.load("../proj_matrices/cupl_l2_proj_mix.npy")
data_projmix = np.matmul(cupl_prompt_embeds.cpu(), projmix)

clf_projmix = LogisticRegression(random_state=0, verbose=1, n_jobs=-1).fit(data_projmix, cupl_prompt_labels)
# clf_projmix.score(clf_projmix.cpu(), labels)
# clf_projmix.score(lt_images.cpu(), lt_images_labels)

print("Many:", clf_projmix.score(many_test_images.T.cpu(), many_test_labels.cpu()))
print("Med:", clf_projmix.score(med_test_images.T.cpu(), med_test_labels.cpu()))
print("Few:", clf_projmix.score(few_test_images.T.cpu(), few_test_labels.cpu()))
print("All:", clf_projmix.score(test_images.cpu(), test_labels.cpu()))

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 20 concurrent workers.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =      1025000     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  3.45388D+05    |proj g|=  1.59717D+01


 This problem is unconstrained.
