#0. Data preparation

In [None]:
import os
import subprocess
import torchvision.transforms as transforms
from PIL import Image
from torchvision.datasets import CIFAR10

In [None]:
dataset_path = './datasets/cifar10'
train_path = os.path.join(dataset_path, 'train')
val_path = os.path.join(dataset_path, 'val')

In [None]:
os.makedirs(train_path, exist_ok=True)
os.makedirs(val_path, exist_ok=True)

In [None]:
transform = transforms.Compose([transforms.ToTensor()])

In [None]:
trainset = CIFAR10(root=train_path, train=True, download=True, transform=transform)
valset = CIFAR10(root=val_path, train=False, download=True, transform=transform)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./datasets/cifar10/train/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 44309987.00it/s]


Extracting ./datasets/cifar10/train/cifar-10-python.tar.gz to ./datasets/cifar10/train
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./datasets/cifar10/val/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:05<00:00, 31137805.42it/s]


Extracting ./datasets/cifar10/val/cifar-10-python.tar.gz to ./datasets/cifar10/val


In [None]:
def save_images(dataset, root_path):
    to_pil = transforms.ToPILImage()
    for idx, (image, label) in enumerate(dataset):
        class_path = os.path.join(root_path, str(label))
        os.makedirs(class_path, exist_ok=True)
        image_path = os.path.join(class_path, f'{idx}.png')
        pil_image = to_pil(image)
        pil_image.save(image_path)

In [None]:
save_images(trainset, train_path)
save_images(valset, val_path)

#1. Models

In [None]:
!git clone https://github.com/gosh-a/solo-learn.git
%cd solo-learn
!pip install .

Cloning into 'solo-learn'...
remote: Enumerating objects: 5161, done.[K
remote: Counting objects: 100% (294/294), done.[K
remote: Compressing objects: 100% (177/177), done.[K
remote: Total 5161 (delta 176), reused 191 (delta 115), pack-reused 4867[K
Receiving objects: 100% (5161/5161), 5.18 MiB | 10.11 MiB/s, done.
Resolving deltas: 100% (3637/3637), done.
/content/solo-learn
Processing /content/solo-learn
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting einops (from solo-learn==1.0.6)
  Downloading einops-0.8.0-py3-none-any.whl (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.2/43.2 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting lightning==2.1.2 (from solo-learn==1.0.6)
  Downloading lightning-2.1.2-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torchmetrics<0.12.0,>=0.6.0 (from solo-learn==1.0.6)
  Downloading 

In [None]:
!nvidia-smi

Sat Jun 22 22:03:58 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   44C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

### Simclr

#### pretrain the model

In [None]:
command = ['python3', 'solo-learn/main_pretrain.py',
           '--config-path', 'scripts/pretrain/cifar/',
           '--config-name', 'simclr.yaml',
           '++wandb.offline=True']

In [None]:
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

for line in process.stdout:
    print(line, end='')

for line in process.stderr:
    print(line, end='')

Files already downloaded and verified

Training: |          | 0/? [00:00<?, ?it/s]
Training:   0%|          | 0/195 [00:00<?, ?it/s]
Epoch 0:   0%|          | 0/195 [00:00<?, ?it/s] 
Epoch 0:  10%|█         | 20/195 [00:14<02:08,  1.36it/s]
Epoch 0:  10%|█         | 20/195 [00:14<02:08,  1.36it/s, v_num=eczv]
Epoch 0:  21%|██        | 40/195 [00:27<01:45,  1.47it/s, v_num=eczv]
Epoch 0:  21%|██        | 40/195 [00:27<01:45,  1.47it/s, v_num=eczv]
Epoch 0:  31%|███       | 60/195 [00:37<01:24,  1.61it/s, v_num=eczv]
Epoch 0:  31%|███       | 60/195 [00:37<01:24,  1.61it/s, v_num=eczv]
Epoch 0:  41%|████      | 80/195 [00:48<01:09,  1.66it/s, v_num=eczv]
Epoch 0:  41%|████      | 80/195 [00:48<01:09,  1.66it/s, v_num=eczv]
Epoch 0:  51%|█████▏    | 100/195 [01:00<00:57,  1.64it/s, v_num=eczv]
Epoch 0:  51%|█████▏    | 100/195 [01:00<00:57,  1.64it/s, v_num=eczv]
Epoch 0:  62%|██████▏   | 120/195 [01:11<00:44,  1.69it/s, v_num=eczv]
Epoch 0:  62%|██████▏   | 120/195 [01:11<00:44,  1.69it/

#### evaluate the model

In [None]:
for dirpath, _, filenames in os.walk(os.path.join(os.getcwd(), 'trained_models', 'simclr')):
  if filenames:
    assert filenames[1].endswith('ckpt')
    new_path = os.path.join(dirpath, filenames[1].replace('=', '_'))
    os.rename(os.path.join(dirpath, filenames[1]), new_path)

In [None]:
command_path_name = '/'.join(new_path.split('/')[2:])

In [None]:
command = ['python3', 'solo-learn/main_linear.py',
           '--config-path', 'scripts/linear/cifar/',
           '--config-name', 'simclr.yaml',
           f'++pretrained_feature_extractor={command_path_name}',
           '++wandb.offline=True']

In [None]:
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

for line in process.stdout:
    print(line, end='')

for line in process.stderr:
    print(line, end='')

Files already downloaded and verified
Files already downloaded and verified

Sanity Checking: |          | 0/? [00:00<?, ?it/s]
Sanity Checking:   0%|          | 0/2 [00:00<?, ?it/s]
Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]
Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:00<00:00,  3.23it/s]
                                                                           

Training: |          | 0/? [00:00<?, ?it/s]
Training:   0%|          | 0/195 [00:00<?, ?it/s]
Epoch 0:   0%|          | 0/195 [00:00<?, ?it/s] 
Epoch 0:  10%|█         | 20/195 [00:04<00:37,  4.69it/s]
Epoch 0:  10%|█         | 20/195 [00:04<00:37,  4.68it/s, v_num=npq2]
Epoch 0:  21%|██        | 40/195 [00:08<00:32,  4.79it/s, v_num=npq2]
Epoch 0:  21%|██        | 40/195 [00:08<00:32,  4.78it/s, v_num=npq2]
Epoch 0:  31%|███       | 60/195 [00:12<00:28,  4.78it/s, v_num=npq2]
Epoch 0:  31%|███       | 60/195 [00:12<00:28,  4.77it/s, v_num=npq2]
Epoch 0:  41%|████      | 80/195 [00:14<00:2

### MoCoV3

#### pretrain the model

In [None]:
command = ['python3', 'solo-learn/main_pretrain.py',
           '--config-path', 'scripts/pretrain/cifar/',
           '--config-name', 'mocov3.yaml',
           '++wandb.offline=True']

In [None]:
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

for line in process.stdout:
    print(line, end='')

for line in process.stderr:
    print(line, end='')

Files already downloaded and verified

Training: |          | 0/? [00:00<?, ?it/s]
Training:   0%|          | 0/195 [00:00<?, ?it/s]
Epoch 0:   0%|          | 0/195 [00:00<?, ?it/s] 
Epoch 0:  10%|█         | 20/195 [00:14<02:03,  1.41it/s]
Epoch 0:  10%|█         | 20/195 [00:14<02:03,  1.41it/s, v_num=8pjp]
Epoch 0:  21%|██        | 40/195 [00:24<01:33,  1.66it/s, v_num=8pjp]
Epoch 0:  21%|██        | 40/195 [00:24<01:33,  1.66it/s, v_num=8pjp]
Epoch 0:  31%|███       | 60/195 [00:36<01:21,  1.66it/s, v_num=8pjp]
Epoch 0:  31%|███       | 60/195 [00:36<01:21,  1.66it/s, v_num=8pjp]
Epoch 0:  41%|████      | 80/195 [00:48<01:09,  1.66it/s, v_num=8pjp]
Epoch 0:  41%|████      | 80/195 [00:48<01:09,  1.66it/s, v_num=8pjp]
Epoch 0:  51%|█████▏    | 100/195 [00:58<00:55,  1.71it/s, v_num=8pjp]
Epoch 0:  51%|█████▏    | 100/195 [00:58<00:55,  1.71it/s, v_num=8pjp]
Epoch 0:  62%|██████▏   | 120/195 [01:09<00:43,  1.72it/s, v_num=8pjp]
Epoch 0:  62%|██████▏   | 120/195 [01:09<00:43,  1.72it/

#### evaluate the model

In [None]:
for dirpath, _, filenames in os.walk(os.path.join(os.getcwd(), 'trained_models', 'mocov3')):
  if filenames:
    assert filenames[1].endswith('ckpt')
    new_path = os.path.join(dirpath, filenames[1].replace('=', '_'))
    os.rename(os.path.join(dirpath, filenames[1]), new_path)

In [None]:
command_path_name = '/'.join(new_path.split('/')[2:])

In [None]:
command = ['python3', 'solo-learn/main_linear.py',
           '--config-path', 'scripts/linear/cifar/',
           '--config-name', 'mocov3.yaml',
           f'++pretrained_feature_extractor={command_path_name}',
           '++wandb.offline=True']

In [None]:
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

for line in process.stdout:
    print(line, end='')

for line in process.stderr:
    print(line, end='')

Files already downloaded and verified
Files already downloaded and verified

Sanity Checking: |          | 0/? [00:00<?, ?it/s]
Sanity Checking:   0%|          | 0/2 [00:00<?, ?it/s]
Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]
Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:00<00:00,  3.43it/s]
                                                                           

Training: |          | 0/? [00:00<?, ?it/s]
Training:   0%|          | 0/195 [00:00<?, ?it/s]
Epoch 0:   0%|          | 0/195 [00:00<?, ?it/s] 
Epoch 0:  10%|█         | 20/195 [00:02<00:21,  8.10it/s]
Epoch 0:  10%|█         | 20/195 [00:02<00:21,  8.06it/s, v_num=nhtr]
Epoch 0:  21%|██        | 40/195 [00:04<00:17,  8.94it/s, v_num=nhtr]
Epoch 0:  21%|██        | 40/195 [00:04<00:17,  8.93it/s, v_num=nhtr]
Epoch 0:  31%|███       | 60/195 [00:07<00:16,  8.18it/s, v_num=nhtr]
Epoch 0:  31%|███       | 60/195 [00:07<00:16,  8.17it/s, v_num=nhtr]
Epoch 0:  41%|████      | 80/195 [00:10<00:1

### DINO

#### pretrain the model

In [None]:
command = ['python3', 'solo-learn/main_pretrain.py',
           '--config-path', 'scripts/pretrain/cifar/',
           '--config-name', 'dino.yaml',
           '++wandb.offline=True']

In [None]:
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

for line in process.stdout:
    print(line, end='')

for line in process.stderr:
    print(line, end='')

Files already downloaded and verified

Training: |          | 0/? [00:00<?, ?it/s]
Training:   0%|          | 0/195 [00:00<?, ?it/s]
Epoch 0:   0%|          | 0/195 [00:00<?, ?it/s] 
Epoch 0:  10%|█         | 20/195 [00:14<02:08,  1.36it/s]
Epoch 0:  10%|█         | 20/195 [00:14<02:08,  1.36it/s, v_num=56da]
Epoch 0:  21%|██        | 40/195 [00:24<01:36,  1.61it/s, v_num=56da]
Epoch 0:  21%|██        | 40/195 [00:24<01:36,  1.61it/s, v_num=56da]
Epoch 0:  31%|███       | 60/195 [00:35<01:20,  1.67it/s, v_num=56da]
Epoch 0:  31%|███       | 60/195 [00:35<01:20,  1.67it/s, v_num=56da]
Epoch 0:  41%|████      | 80/195 [00:48<01:09,  1.65it/s, v_num=56da]
Epoch 0:  41%|████      | 80/195 [00:48<01:09,  1.65it/s, v_num=56da]
Epoch 0:  51%|█████▏    | 100/195 [00:59<00:56,  1.67it/s, v_num=56da]
Epoch 0:  51%|█████▏    | 100/195 [00:59<00:56,  1.67it/s, v_num=56da]
Epoch 0:  62%|██████▏   | 120/195 [01:10<00:43,  1.71it/s, v_num=56da]
Epoch 0:  62%|██████▏   | 120/195 [01:10<00:43,  1.71it/

#### evaluate the model

In [None]:
for dirpath, _, filenames in os.walk(os.path.join(os.getcwd(), 'trained_models', 'dino')):
  if filenames:
    assert filenames[1].endswith('ckpt')
    new_path = os.path.join(dirpath, filenames[1].replace('=', '_'))
    os.rename(os.path.join(dirpath, filenames[1]), new_path)

In [None]:
command_path_name = '/'.join(new_path.split('/')[2:])

In [None]:
command = ['python3', 'solo-learn/main_linear.py',
           '--config-path', 'scripts/linear/cifar/',
           '--config-name', 'dino.yaml',
           f'++pretrained_feature_extractor={command_path_name}',
           '++wandb.offline=True']

In [None]:
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

for line in process.stdout:
    print(line, end='')

for line in process.stderr:
    print(line, end='')

Files already downloaded and verified
Files already downloaded and verified

Sanity Checking: |          | 0/? [00:00<?, ?it/s]
Sanity Checking:   0%|          | 0/2 [00:00<?, ?it/s]
Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]
Sanity Checking DataLoader 0: 100%|██████████| 2/2 [00:00<00:00,  3.17it/s]
                                                                           

Training: |          | 0/? [00:00<?, ?it/s]
Training:   0%|          | 0/195 [00:00<?, ?it/s]
Epoch 0:   0%|          | 0/195 [00:00<?, ?it/s] 
Epoch 0:  10%|█         | 20/195 [00:02<00:20,  8.44it/s]
Epoch 0:  10%|█         | 20/195 [00:02<00:20,  8.42it/s, v_num=2ekp]
Epoch 0:  21%|██        | 40/195 [00:04<00:18,  8.25it/s, v_num=2ekp]
Epoch 0:  21%|██        | 40/195 [00:04<00:18,  8.24it/s, v_num=2ekp]
Epoch 0:  31%|███       | 60/195 [00:08<00:18,  7.20it/s, v_num=2ekp]
Epoch 0:  31%|███       | 60/195 [00:08<00:18,  7.19it/s, v_num=2ekp]
Epoch 0:  41%|████      | 80/195 [00:12<00:1