## Initiate open_clip and create embeddings for labels

In [1]:
import open_clip

model, _, preprocess = open_clip.create_model_and_transforms(
    "ViT-B-32", 
    pretrained="laion2b_s34b_b79k"
)

tokenizer = open_clip.get_tokenizer("ViT-B-32")

labels = [
    "an airplane",
    "a bird",
    "a car",
    "a cat",
    "a deer",
    "a dog",
    "a horse",
    "a monkey",
    "a ship",
    "a truck"
]

text = tokenizer(labels)
text_embeddings = model.encode_text(text)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
len(text_embeddings)

10

In [3]:
import torch.nn.functional as F

def embeddings_to_class_probs(vision_embeddings, text_embeddings):
    vision_embeddings = vision_embeddings / vision_embeddings.norm(dim=-1, keepdim=True)
    text_embeddings = text_embeddings / text_embeddings.norm(dim=-1, keepdim=True)
    logits = vision_embeddings @ text_embeddings.T
    class_probs = F.softmax(100. * logits, dim=-1)
    return class_probs

### Get predictions on the training and testing dataset for open clip

In [4]:
import tqdm
import torch
from torchvision.datasets import STL10

dataset = STL10(
    root="./stl10",
    download=True,
    split="test"
)

num_correct = 0

for image, label in tqdm.tqdm(dataset):
    input_tensor = preprocess(image).unsqueeze(0)
    vision_embeddings = model.encode_image(input_tensor)
    output_class_probs = embeddings_to_class_probs(vision_embeddings, text_embeddings)
    output_label = torch.argmax(output_class_probs, dim=-1)
    num_correct += int(torch.count_nonzero(output_label == label))

accuracy = 100. * num_correct / len(dataset)

100%|██████████| 8000/8000 [04:51<00:00, 27.45it/s]


In [5]:
accuracy

96.675

### Training resnet18 from scratch on the STL10 dataset with no distillation


In [None]:
from stl10_utils import (
    precompute_clip_stl10_train_image_embeddings,
    precompute_clip_stl10_test_image_embeddings,
    precompute_clip_stl10_text_embeddings,
    train_resnet18_from_scratch,
    train_resnet18_linear_probe_train_only
)

precompute_clip_stl10_train_image_embeddings()
precompute_clip_stl10_test_image_embeddings()
precompute_clip_stl10_text_embeddings()
train_resnet18_from_scratch()


  from .autonotebook import tqdm as notebook_tqdm
5000it [00:00, 8787.94it/s]
8000it [00:00, 9257.65it/s]


Using device: mps for train_model_from_scratch




Epoch 1 - Test Loss: 1.7259, Test Accuracy: 34.29%
Epoch 2 - Test Loss: 1.6783, Test Accuracy: 39.52%
Epoch 3 - Test Loss: 1.4484, Test Accuracy: 46.79%
Epoch 4 - Test Loss: 1.3064, Test Accuracy: 51.29%
Epoch 5 - Test Loss: 1.2840, Test Accuracy: 52.86%
Epoch 6 - Test Loss: 1.2543, Test Accuracy: 53.89%
Epoch 7 - Test Loss: 1.3261, Test Accuracy: 55.14%
Epoch 8 - Test Loss: 1.2820, Test Accuracy: 57.17%
Epoch 9 - Test Loss: 1.3840, Test Accuracy: 56.45%
Epoch 10 - Test Loss: 1.3339, Test Accuracy: 57.70%
Finished Training


### Train linear probe version of the open clip transformer model first then use as the teacher for the resnet18 model. Only use the 5000 labeled training images.

In [None]:
from stl10_utils import (
    precompute_clip_stl10_train_image_embeddings,
    precompute_clip_stl10_test_image_embeddings,
    precompute_clip_stl10_text_embeddings,
    train_resnet18_from_scratch,
    train_resnet18_linear_probe_train_only
)
train_resnet18_linear_probe_train_only()

  from .autonotebook import tqdm as notebook_tqdm


Using device: mps for train_probe_model


100%|██████████| 79/79 [00:41<00:00,  1.90it/s]
100%|██████████| 125/125 [00:41<00:00,  3.02it/s] 

| EPOCH 0 | TRAIN LOSS 1.6945151030262815 | TEST ACC 96.525 |
Saving checkpoint to data/experiments/train_probe_model_linear/checkpoint_0.pth



100%|██████████| 79/79 [00:41<00:00,  1.93it/s] 
100%|██████████| 125/125 [00:41<00:00,  3.01it/s] 

| EPOCH 1 | TRAIN LOSS 0.8586429312259336 | TEST ACC 97.875 |
Saving checkpoint to data/experiments/train_probe_model_linear/checkpoint_1.pth



100%|██████████| 79/79 [00:41<00:00,  1.93it/s]
100%|██████████| 125/125 [00:41<00:00,  3.03it/s] 

| EPOCH 2 | TRAIN LOSS 0.4846418032163306 | TEST ACC 98.15 |
Saving checkpoint to data/experiments/train_probe_model_linear/checkpoint_2.pth



100%|██████████| 79/79 [00:40<00:00,  1.93it/s] 
100%|██████████| 125/125 [00:41<00:00,  3.02it/s] 

| EPOCH 3 | TRAIN LOSS 0.3205600018742718 | TEST ACC 98.287 |
Saving checkpoint to data/experiments/train_probe_model_linear/checkpoint_3.pth



100%|██████████| 79/79 [00:41<00:00,  1.93it/s] 
100%|██████████| 125/125 [00:41<00:00,  3.02it/s] 

| EPOCH 4 | TRAIN LOSS 0.23346096861966048 | TEST ACC 98.275 |
Saving checkpoint to data/experiments/train_probe_model_linear/checkpoint_4.pth



100%|██████████| 79/79 [00:41<00:00,  1.93it/s] 
100%|██████████| 125/125 [00:41<00:00,  3.02it/s] 

| EPOCH 5 | TRAIN LOSS 0.18421262238599076 | TEST ACC 98.312 |
Saving checkpoint to data/experiments/train_probe_model_linear/checkpoint_5.pth



100%|██████████| 79/79 [00:41<00:00,  1.92it/s]
100%|██████████| 125/125 [00:41<00:00,  3.02it/s] 

| EPOCH 6 | TRAIN LOSS 0.15107247714377656 | TEST ACC 98.325 |
Saving checkpoint to data/experiments/train_probe_model_linear/checkpoint_6.pth



100%|██████████| 79/79 [00:41<00:00,  1.92it/s] 
100%|██████████| 125/125 [00:41<00:00,  3.02it/s] 

| EPOCH 7 | TRAIN LOSS 0.1297762291152266 | TEST ACC 98.35 |
Saving checkpoint to data/experiments/train_probe_model_linear/checkpoint_7.pth



100%|██████████| 79/79 [00:41<00:00,  1.92it/s]
100%|██████████| 125/125 [00:41<00:00,  3.02it/s] 

| EPOCH 8 | TRAIN LOSS 0.11251492383359354 | TEST ACC 98.362 |
Saving checkpoint to data/experiments/train_probe_model_linear/checkpoint_8.pth



100%|██████████| 79/79 [00:41<00:00,  1.93it/s]
100%|██████████| 125/125 [00:41<00:00,  3.02it/s] 

| EPOCH 9 | TRAIN LOSS 0.10368885986412628 | TEST ACC 98.412 |
Saving checkpoint to data/experiments/train_probe_model_linear/checkpoint_9.pth



100%|██████████| 79/79 [00:41<00:00,  1.93it/s]
100%|██████████| 125/125 [00:41<00:00,  3.02it/s] 

| EPOCH 10 | TRAIN LOSS 0.0907736331507375 | TEST ACC 98.388 |
Saving checkpoint to data/experiments/train_probe_model_linear/checkpoint_10.pth



100%|██████████| 79/79 [00:40<00:00,  1.93it/s] 
100%|██████████| 125/125 [00:41<00:00,  3.02it/s] 

| EPOCH 11 | TRAIN LOSS 0.08262064373945888 | TEST ACC 98.463 |
Saving checkpoint to data/experiments/train_probe_model_linear/checkpoint_11.pth



100%|██████████| 79/79 [00:41<00:00,  1.92it/s] 
100%|██████████| 125/125 [00:41<00:00,  3.02it/s] 

| EPOCH 12 | TRAIN LOSS 0.07603679161188723 | TEST ACC 98.475 |
Saving checkpoint to data/experiments/train_probe_model_linear/checkpoint_12.pth



100%|██████████| 79/79 [00:41<00:00,  1.92it/s]
100%|██████████| 125/125 [00:41<00:00,  3.02it/s] 

| EPOCH 13 | TRAIN LOSS 0.07574994027425971 | TEST ACC 98.463 |
Saving checkpoint to data/experiments/train_probe_model_linear/checkpoint_13.pth



100%|██████████| 79/79 [00:41<00:00,  1.93it/s]
100%|██████████| 125/125 [00:41<00:00,  3.02it/s] 


| EPOCH 14 | TRAIN LOSS 0.06695853437803968 | TEST ACC 98.487 |
Saving checkpoint to data/experiments/train_probe_model_linear/checkpoint_14.pth
Using device: mps for train_student_classification_model


100%|██████████| 79/79 [00:48<00:00,  1.61it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 0 | TRAIN LOSS 0.17317846756947192 | TEST ACC 36.038 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe_train_only/checkpoint_0.pth



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 1 | TRAIN LOSS 0.14051566159800638 | TEST ACC 44.55 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe_train_only/checkpoint_1.pth



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 2 | TRAIN LOSS 0.1182014294251611 | TEST ACC 46.65 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe_train_only/checkpoint_2.pth



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.88it/s]

| EPOCH 3 | TRAIN LOSS 0.09572759308392488 | TEST ACC 51.538 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe_train_only/checkpoint_3.pth



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 4 | TRAIN LOSS 0.07417350564199159 | TEST ACC 49.675 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.86it/s]

| EPOCH 5 | TRAIN LOSS 0.05573632780321037 | TEST ACC 56.462 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe_train_only/checkpoint_5.pth



100%|██████████| 79/79 [00:47<00:00,  1.67it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 6 | TRAIN LOSS 0.033709211958737315 | TEST ACC 55.8 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.86it/s]

| EPOCH 7 | TRAIN LOSS 0.02217064075219103 | TEST ACC 58.962 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe_train_only/checkpoint_7.pth



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 8 | TRAIN LOSS 0.012735146681389099 | TEST ACC 58.95 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 9 | TRAIN LOSS 0.007625078976955972 | TEST ACC 58.85 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.86it/s]

| EPOCH 10 | TRAIN LOSS 0.0065189318943627275 | TEST ACC 58.45 |



100%|██████████| 79/79 [00:47<00:00,  1.67it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 11 | TRAIN LOSS 0.005437405393871514 | TEST ACC 61.0 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe_train_only/checkpoint_11.pth



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.86it/s]

| EPOCH 12 | TRAIN LOSS 0.005024810975912629 | TEST ACC 59.987 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.86it/s]

| EPOCH 13 | TRAIN LOSS 0.005202068570011024 | TEST ACC 59.938 |



100%|██████████| 79/79 [00:47<00:00,  1.67it/s]
100%|██████████| 125/125 [00:43<00:00,  2.88it/s]

| EPOCH 14 | TRAIN LOSS 0.005184163940669615 | TEST ACC 60.025 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.86it/s]

| EPOCH 15 | TRAIN LOSS 0.004824988661876208 | TEST ACC 59.225 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 16 | TRAIN LOSS 0.004273280463051758 | TEST ACC 60.288 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.86it/s]

| EPOCH 17 | TRAIN LOSS 0.005281506966846653 | TEST ACC 59.763 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 18 | TRAIN LOSS 0.004780650913974718 | TEST ACC 59.163 |



100%|██████████| 79/79 [00:47<00:00,  1.67it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 19 | TRAIN LOSS 0.004898924681157628 | TEST ACC 59.55 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.86it/s]

| EPOCH 20 | TRAIN LOSS 0.004698792778992955 | TEST ACC 59.462 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 21 | TRAIN LOSS 0.004499808422443044 | TEST ACC 59.087 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 22 | TRAIN LOSS 0.004157212150247791 | TEST ACC 58.475 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 23 | TRAIN LOSS 0.0050617341627072114 | TEST ACC 59.6 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 24 | TRAIN LOSS 0.006159648646989578 | TEST ACC 58.95 |



100%|██████████| 79/79 [00:47<00:00,  1.67it/s]
100%|██████████| 125/125 [00:43<00:00,  2.88it/s]

| EPOCH 25 | TRAIN LOSS 0.0046725352206326365 | TEST ACC 59.062 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 26 | TRAIN LOSS 0.00645063483580664 | TEST ACC 59.188 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 27 | TRAIN LOSS 0.004075771597025421 | TEST ACC 60.337 |



100%|██████████| 79/79 [00:47<00:00,  1.67it/s]
100%|██████████| 125/125 [00:43<00:00,  2.86it/s]

| EPOCH 28 | TRAIN LOSS 0.003833013704588896 | TEST ACC 60.087 |



100%|██████████| 79/79 [00:47<00:00,  1.67it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 29 | TRAIN LOSS 0.003347965200502378 | TEST ACC 60.538 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.86it/s]

| EPOCH 30 | TRAIN LOSS 0.0032185214524499224 | TEST ACC 61.288 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe_train_only/checkpoint_30.pth



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.86it/s]

| EPOCH 31 | TRAIN LOSS 0.003143160959080899 | TEST ACC 60.575 |



100%|██████████| 79/79 [00:47<00:00,  1.67it/s]
100%|██████████| 125/125 [00:43<00:00,  2.86it/s]

| EPOCH 32 | TRAIN LOSS 0.003011773943111206 | TEST ACC 60.825 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.86it/s]

| EPOCH 33 | TRAIN LOSS 0.0034961330783966032 | TEST ACC 61.225 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.86it/s]

| EPOCH 34 | TRAIN LOSS 0.002953878574403404 | TEST ACC 60.663 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 35 | TRAIN LOSS 0.0028141941256302444 | TEST ACC 60.4 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 36 | TRAIN LOSS 0.0027187037903556154 | TEST ACC 61.3 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe_train_only/checkpoint_36.pth



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 37 | TRAIN LOSS 0.002916039443645579 | TEST ACC 60.913 |



100%|██████████| 79/79 [00:47<00:00,  1.67it/s]
100%|██████████| 125/125 [00:43<00:00,  2.86it/s]

| EPOCH 38 | TRAIN LOSS 0.002963168984220092 | TEST ACC 60.938 |



100%|██████████| 79/79 [00:47<00:00,  1.67it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 39 | TRAIN LOSS 0.003542058981801796 | TEST ACC 60.075 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 40 | TRAIN LOSS 0.0031600376986014316 | TEST ACC 60.513 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 41 | TRAIN LOSS 0.002905622885055557 | TEST ACC 60.7 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 42 | TRAIN LOSS 0.002950665296373677 | TEST ACC 59.712 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 43 | TRAIN LOSS 0.002645805763976672 | TEST ACC 61.125 |



100%|██████████| 79/79 [00:47<00:00,  1.67it/s]
100%|██████████| 125/125 [00:43<00:00,  2.86it/s]

| EPOCH 44 | TRAIN LOSS 0.0025625785509171555 | TEST ACC 60.425 |



100%|██████████| 79/79 [00:47<00:00,  1.67it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]


| EPOCH 45 | TRAIN LOSS 0.0022257919057826455 | TEST ACC 61.65 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe_train_only/checkpoint_45.pth


100%|██████████| 79/79 [00:47<00:00,  1.67it/s]
100%|██████████| 125/125 [00:43<00:00,  2.86it/s]

| EPOCH 46 | TRAIN LOSS 0.0020061021190301716 | TEST ACC 60.775 |



100%|██████████| 79/79 [00:47<00:00,  1.65it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 47 | TRAIN LOSS 0.0019103355817732554 | TEST ACC 61.725 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe_train_only/checkpoint_47.pth



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 48 | TRAIN LOSS 0.0019364761884170998 | TEST ACC 61.625 |



100%|██████████| 79/79 [00:47<00:00,  1.66it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 49 | TRAIN LOSS 0.0019169185602377298 | TEST ACC 61.875 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe_train_only/checkpoint_49.pth





This results in a meager 62% accuracy, we can do better

### Train the resnet18 as the studuent with open_clip's linear probe version of the transformer as the teacher. Use the 5000 labeled images plus the 100000 unlabeled images for the training dataset.

In [None]:
from stl10_utils import (
    precompute_clip_stl10_train_image_embeddings,
    precompute_clip_stl10_unlabeled_image_embeddings,
    precompute_clip_stl10_test_image_embeddings,
    precompute_clip_stl10_text_embeddings,
    train_resnet18_linear_probe
)

precompute_clip_stl10_train_image_embeddings()
precompute_clip_stl10_unlabeled_image_embeddings()
precompute_clip_stl10_test_image_embeddings()
precompute_clip_stl10_text_embeddings()


5000it [00:00, 6755.83it/s]
100000it [1:03:37, 26.20it/s]
8000it [00:01, 7360.21it/s]


Using device: mps for train_student_classification_model


In [2]:
from stl10_utils import (
    train_resnet18_linear_probe
)

train_resnet18_linear_probe()

  from .autonotebook import tqdm as notebook_tqdm


Using device: mps for train_student_classification_model


100%|██████████| 1641/1641 [25:39<00:00,  1.07it/s]   
100%|██████████| 125/125 [04:09<00:00,  2.00s/it]

| EPOCH 0 | TRAIN LOSS 0.08073753968226263 | TEST ACC 62.725 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe/checkpoint_0.pth



100%|██████████| 1641/1641 [06:01<00:00,  4.54it/s]
100%|██████████| 125/125 [00:43<00:00,  2.86it/s]

| EPOCH 1 | TRAIN LOSS 0.05812758934899193 | TEST ACC 75.787 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe/checkpoint_1.pth



100%|██████████| 1641/1641 [07:39<00:00,  3.57it/s]  
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]

| EPOCH 2 | TRAIN LOSS 0.04811464165184717 | TEST ACC 81.338 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe/checkpoint_2.pth



100%|██████████| 1641/1641 [17:26<00:00,  1.57it/s]   
100%|██████████| 125/125 [00:43<00:00,  2.86it/s]

| EPOCH 3 | TRAIN LOSS 0.03889543857223409 | TEST ACC 87.675 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe/checkpoint_3.pth



100%|██████████| 1641/1641 [04:06<00:00,  6.66it/s]
100%|██████████| 125/125 [00:44<00:00,  2.81it/s]

| EPOCH 4 | TRAIN LOSS 0.030427580838937863 | TEST ACC 90.775 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe/checkpoint_4.pth



100%|██████████| 1641/1641 [03:55<00:00,  6.98it/s]
100%|██████████| 125/125 [00:44<00:00,  2.83it/s]

| EPOCH 5 | TRAIN LOSS 0.023024145277679004 | TEST ACC 94.237 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe/checkpoint_5.pth



100%|██████████| 1641/1641 [04:36<00:00,  5.93it/s] 
100%|██████████| 125/125 [00:44<00:00,  2.81it/s]

| EPOCH 6 | TRAIN LOSS 0.01760030334155909 | TEST ACC 95.125 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe/checkpoint_6.pth



100%|██████████| 1641/1641 [04:17<00:00,  6.36it/s]  
100%|██████████| 125/125 [00:44<00:00,  2.82it/s]

| EPOCH 7 | TRAIN LOSS 0.014369634148936621 | TEST ACC 96.138 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe/checkpoint_7.pth



100%|██████████| 1641/1641 [03:14<00:00,  8.43it/s]
100%|██████████| 125/125 [00:43<00:00,  2.89it/s]

| EPOCH 8 | TRAIN LOSS 0.012054615343042913 | TEST ACC 95.9 |



100%|██████████| 1641/1641 [03:14<00:00,  8.45it/s]
100%|██████████| 125/125 [00:43<00:00,  2.89it/s]

| EPOCH 9 | TRAIN LOSS 0.010548295209718906 | TEST ACC 96.25 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe/checkpoint_9.pth



100%|██████████| 1641/1641 [03:13<00:00,  8.46it/s]
100%|██████████| 125/125 [00:43<00:00,  2.89it/s]

| EPOCH 10 | TRAIN LOSS 0.009557550123198632 | TEST ACC 96.263 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe/checkpoint_10.pth



100%|██████████| 1641/1641 [03:13<00:00,  8.46it/s]
100%|██████████| 125/125 [00:43<00:00,  2.89it/s]

| EPOCH 11 | TRAIN LOSS 0.008693912711237387 | TEST ACC 96.325 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe/checkpoint_11.pth



100%|██████████| 1641/1641 [03:14<00:00,  8.44it/s]
100%|██████████| 125/125 [00:43<00:00,  2.86it/s]

| EPOCH 12 | TRAIN LOSS 0.00801360661261542 | TEST ACC 96.062 |



100%|██████████| 1641/1641 [03:13<00:00,  8.48it/s]
100%|██████████| 125/125 [00:43<00:00,  2.89it/s]

| EPOCH 13 | TRAIN LOSS 0.007371369825640897 | TEST ACC 96.362 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe/checkpoint_13.pth



100%|██████████| 1641/1641 [03:13<00:00,  8.46it/s]
100%|██████████| 125/125 [00:43<00:00,  2.89it/s]


| EPOCH 14 | TRAIN LOSS 0.006923411502335436 | TEST ACC 96.388 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe/checkpoint_14.pth


100%|██████████| 1641/1641 [03:14<00:00,  8.45it/s]
100%|██████████| 125/125 [00:43<00:00,  2.89it/s]

| EPOCH 15 | TRAIN LOSS 0.006489613841728766 | TEST ACC 96.325 |



100%|██████████| 1641/1641 [03:15<00:00,  8.40it/s]
100%|██████████| 125/125 [00:43<00:00,  2.89it/s]

| EPOCH 16 | TRAIN LOSS 0.006063264433554465 | TEST ACC 96.375 |



100%|██████████| 1641/1641 [03:14<00:00,  8.45it/s]
100%|██████████| 125/125 [00:43<00:00,  2.87it/s]


| EPOCH 17 | TRAIN LOSS 0.00566466551558963 | TEST ACC 96.562 |
Saving checkpoint to data/experiments/train_resnet18_linear_probe/checkpoint_17.pth


100%|██████████| 1641/1641 [03:13<00:00,  8.47it/s]
100%|██████████| 125/125 [00:43<00:00,  2.89it/s]

| EPOCH 18 | TRAIN LOSS 0.005285361894229876 | TEST ACC 96.025 |



100%|██████████| 1641/1641 [03:12<00:00,  8.54it/s]
100%|██████████| 125/125 [00:43<00:00,  2.90it/s]

| EPOCH 19 | TRAIN LOSS 0.004969530341758561 | TEST ACC 96.412 |



  5%|▌         | 83/1641 [00:09<02:57,  8.76it/s]


: 

#### This quickly returned much better results and is nearly at the same accuracy level to the base open clip transformer model.