# Model and data loading

In [1]:
from transformers import AutoModel

DEVICE = 1
model_code = 'openai/clip-vit-large-patch14-336'

model = AutoModel.from_pretrained(model_code).to(DEVICE)

`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.


In [2]:
from data_utils import data_loader

data_code = 'fbhm'
train = data_loader(data_code, 'train', model_code, bs=64, max_length=77)
dev = data_loader(data_code, 'dev', model_code, bs=64, max_length=77)


`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.
`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The value `text_config["id2label"]` will be overriden.


# Extraction (Train)
Use chosen model to obtain cross-modal matrices

In [3]:
import torch
from tqdm.auto import tqdm

def get_matrices(data_loader, model):
    matrices, labels = [], []
    with torch.no_grad():
        for data, label in tqdm(data_loader):
            output = model(**data.to(DEVICE))
            img, txt = output.image_embeds, \
                       output.text_embeds
            matrix = img.unsqueeze(-1) @ txt.unsqueeze(1)
            matrices.append(matrix.cpu())
            labels.append(label)
    return torch.cat(matrices), torch.cat(labels)

In [4]:
matrices, labels = get_matrices(train, model)

  0%|          | 0/133 [00:00<?, ?it/s]

# Dimensionality reduction (Train)

In [5]:
import numpy as np

from sklearn.decomposition import KernelPCA

flat_m = matrices.view(matrices.shape[0], -1).numpy()

reduction = KernelPCA(256, kernel='cosine')
reduced_m = reduction.fit_transform(flat_m)

# Extraction and reduction (Validation)

In [10]:
valid_matrices, valid_labels = get_matrices(dev, model)
valid_reduced_m = reduction.transform(valid_matrices.view(valid_matrices.shape[0], -1).numpy())

  0%|          | 0/8 [00:04<?, ?it/s]

# Apply learning algorithms

In [17]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.feature_selection import VarianceThreshold

import pandas as pd

names = [
    "Nearest Neighbors",
    "Linear SVM",
    "RBF SVM",
    "Decision Tree",
    "Random Forest",
    "AdaBoost",
    "Naive Bayes",
    "QDA",
]

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    DecisionTreeClassifier(),
    RandomForestClassifier(n_estimators=100),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis(),
]


results = dict()
for c, n in zip(classifiers, names):
    print(f'Classifier {n}')
    clf = make_pipeline(StandardScaler(), VarianceThreshold(), c)
    clf.fit(reduced_m, labels)
    score = clf.score(valid_reduced_m, valid_labels)
    results[n] = {'score': score}

pd.DataFrame(results)


Classifier Nearest Neighbors
Classifier Linear SVM
Classifier RBF SVM
Classifier Decision Tree
Classifier Random Forest
Classifier AdaBoost
Classifier Naive Bayes
Classifier QDA


Unnamed: 0,Nearest Neighbors,Linear SVM,RBF SVM,Decision Tree,Random Forest,AdaBoost,Naive Bayes,QDA
score,0.556,0.616,0.5,0.57,0.532,0.578,0.592,0.644


In [16]:
matrices.shape

torch.Size([8500, 768, 768])

In [8]:
!pip install catboost

Defaulting to user installation because normal site-packages is not writeable
Collecting catboost
  Downloading catboost-1.1.1-cp310-none-manylinux1_x86_64.whl (76.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.6/76.6 MB[0m [31m18.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting graphviz
  Downloading graphviz-0.20.1-py3-none-any.whl (47 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.0/47.0 KB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: graphviz, catboost
Successfully installed catboost-1.1.1 graphviz-0.20.1
