In [1]:
!pip install torch scikit-learn tqdm 

You should consider upgrading via the '/usr/bin/python3.8 -m pip install --upgrade pip' command.[0m


## Load pretrained

In [11]:
import torch
model = torch.hub.load('YannDubs/Invariant-Self-Supervised-Learning:main', 'dissl_resnet50_dNone_e400_m6')
preprocessor = torch.hub.load('YannDubs/Invariant-Self-Supervised-Learning:main', 'preprocessor')

Using cache found in /root/.cache/torch/hub/YannDubs_Invariant-Self-Supervised-Learning_main
Downloading: "https://github.com/YannDubs/Invariant-Self-Supervised-Learning/releases/download/v1.0.0-alpha/dissl_resnet50_dNone_e400_m6.torch" to /root/.cache/torch/hub/checkpoints/dissl_resnet50_dNone_e400_m6.torch


  0%|          | 0.00/90.0M [00:00<?, ?B/s]

Using cache found in /root/.cache/torch/hub/YannDubs_Invariant-Self-Supervised-Learning_main


In [12]:
# other models
torch.hub.list("YannDubs/Invariant-Self-Supervised-Learning:main")

Using cache found in /root/.cache/torch/hub/YannDubs_Invariant-Self-Supervised-Learning_main


['dissl_resnet50_d8192_e100_m2',
 'dissl_resnet50_d8192_e400_m6',
 'dissl_resnet50_d8192_e800_m8',
 'dissl_resnet50_dNone_e100_m2',
 'dissl_resnet50_dNone_e400_m2',
 'dissl_resnet50_dNone_e400_m6',
 'preprocessor']

## Featurize some data
This requires a GPU

In [13]:
from torchvision.datasets import STL10
data_dir = "data/"
data_train = STL10(data_dir,  split="train", transform=preprocessor, download=True)
data_test = STL10(data_dir, split="test", transform=preprocessor, download=True)

In [14]:
import tqdm
from torch.utils.data import DataLoader
import numpy as np
import os

def featurize_data(dataset, pretrained):
    pretrained = pretrained.cuda().half()
    with torch.no_grad():
        Z, Y = [], []
        for x, y in tqdm.tqdm(DataLoader(dataset, batch_size=512, num_workers=os.cpu_count())):
            Z += [pretrained(x.cuda().half()).cpu().numpy()]
            Y += [y.cpu().numpy()]
    return np.concatenate(Z), np.concatenate(Y)

In [15]:
Z_train, Y_train = featurize_data(data_train, model)
Z_test, Y_test = featurize_data(data_test, model)

100%|██████████| 10/10 [00:12<00:00,  1.26s/it]
100%|██████████| 16/16 [00:20<00:00,  1.26s/it]


## Evaluate

In [16]:
# Downstream evaluation. Accuracy: 96.00%
from sklearn.svm import LinearSVC

best_acc = 0
for C in tqdm.tqdm(np.logspace(-3,0,base=10,num=7)):
    clf = LinearSVC(C=C)
    clf.fit(Z_train, Y_train)
    acc = clf.score(Z_test, Y_test)
    best_acc = max(best_acc, acc)
print(f"Downstream STL10 accuracy: {best_acc*100:.2f}%") 

100%|██████████| 7/7 [00:26<00:00,  3.74s/it]

Downstream STL10 accuracy: 96.00%



