In [1]:
import torch
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torch.nn as nn
import numpy as np
import os
import clip
    
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset

In [5]:
    model, preprocess = clip.load('ViT-B/32', device)

In [83]:
path_dataset_10 = "/projectnb/ec523kb/projects/chopped/data/food101_10percent/train"
transform = transforms.Compose([
    transforms.Resize((512, 512)),
    transforms.ToTensor()
    ])
train_dataset_10 = datasets.ImageFolder(path_dataset_10, transform=transform)
dataloader_10 = DataLoader(train_dataset_10, batch_size=10, shuffle=True)

In [84]:
path_dataset_test = "/projectnb/ec523kb/projects/chopped/data/food101_test/"
test_dataset = datasets.ImageFolder(path_dataset_test, transform=transform)
dataloader_test = DataLoader(test_dataset, batch_size=10)

In [25]:
img, label = next(iter(dataloader_10))


img = transforms.Resize((224, 224))(img)
img = img.unsqueeze(0)
img = img.to(device)
img = img.half()
print(img.shape)

out = model.visual(img)
print(out.shape)
# fig, ax = plt.subplots(1, 3)
# ax[0].imshow(img.permute(1, 2, 0))
# print(label)

torch.Size([10, 3, 512, 512])
torch.Size([10, 3, 224, 224])
torch.Size([10, 512])


In [41]:
class ClipClassify(nn.Module):
    def __init__(self):
        super().__init__()
        self.vit = model.visual
        self.classification_head = nn.Linear(512, 101, dtype=torch.float32)

    def forward(self, x):
        x = self.vit(x)
        x = x.float()
        x = self.classification_head(x)
        x = F.softmax(x, dim=1)
        return x

In [63]:
def preprocess_images(inputs):
    inputs = transforms.Resize((224, 224))(inputs)
    inputs = inputs.to(device)
    inputs = inputs.half()
    return inputs

## Fine Tune With Various LLRD

In [89]:
parameters = []
net = ClipClassify().to(device)
dataloader_10 = DataLoader(train_dataset_10, batch_size=10, shuffle=True)
# Freeze Vit
for name, param in net.vit.named_parameters():
    param.requires_grad = False

epochs = 10
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=1e-4)
print_n = 100

for epoch in range(epochs):
    running_loss = 0.
    for i, data in enumerate(dataloader_10):
        inputs, labels = data
        inputs = transforms.Resize((224, 224))(inputs)
        inputs = inputs.to(device)
        inputs = inputs.half()

        labels = labels.to(device)
        
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % print_n == print_n - 1:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / print_n:.3f}')
            running_loss = 0.0

[1,   100] loss: 4.615
[1,   200] loss: 4.614
[1,   300] loss: 4.613
[1,   400] loss: 4.611
[1,   500] loss: 4.609
[1,   600] loss: 4.608
[1,   700] loss: 4.606
[2,   100] loss: 4.601
[2,   200] loss: 4.594
[2,   300] loss: 4.589
[2,   400] loss: 4.583
[2,   500] loss: 4.572
[2,   600] loss: 4.564
[2,   700] loss: 4.562
[3,   100] loss: 4.536
[3,   200] loss: 4.523
[3,   300] loss: 4.520
[3,   400] loss: 4.509
[3,   500] loss: 4.509
[3,   600] loss: 4.484
[3,   700] loss: 4.487
[4,   100] loss: 4.466
[4,   200] loss: 4.443
[4,   300] loss: 4.452
[4,   400] loss: 4.451
[4,   500] loss: 4.440
[4,   600] loss: 4.434
[4,   700] loss: 4.400
[5,   100] loss: 4.387
[5,   200] loss: 4.395
[5,   300] loss: 4.398
[5,   400] loss: 4.377
[5,   500] loss: 4.379
[5,   600] loss: 4.370
[5,   700] loss: 4.364
[6,   100] loss: 4.358
[6,   200] loss: 4.336
[6,   300] loss: 4.331
[6,   400] loss: 4.333
[6,   500] loss: 4.314
[6,   600] loss: 4.337
[6,   700] loss: 4.329
[7,   100] loss: 4.310
[7,   200] 

In [None]:
correct = 0
total = 0
dataloader_test = DataLoader(test_dataset, batch_size=10, shuffle=True)

n = 100
# since we're not training, we don't need to calculate the gradients for our outputs
print("data loaded")
with torch.no_grad():
    for i, data in enumerate(dataloader_test):
        images, labels = data
        # calculate outputs by running images through the network
        labels = labels.to(device)
        images = preprocess_images(images)
        
        outputs = net(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        print(i)

        if i == n:
            break

print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')

In [86]:
dataloader_test = DataLoader(test_dataset, batch_size=10, shuffle=True)
dataloader_10 = DataLoader(train_dataset_10, batch_size=10, shuffle=True)
for data in dataloader_10:
    img, label = data
    print(label)

tensor([36, 37, 21, 39, 22, 18, 60, 33, 13, 79])
tensor([43, 29, 32, 72, 60, 30, 46, 38, 92, 19])
tensor([23, 14,  2, 93, 17, 30, 94,  2, 28, 20])
tensor([21, 93, 31, 82, 85, 91,  1, 75, 22, 33])
tensor([ 41,   2,  50,  28,  54, 100,   0,  23,  99,  28])
tensor([ 5, 15, 33,  7, 91, 86,  2, 81, 28, 13])
tensor([ 4, 94, 90, 75, 31, 82, 34, 27, 51, 30])
tensor([47, 53, 40,  7, 52, 35, 41, 86, 83, 83])
tensor([39, 45, 43, 14, 74, 25, 96, 96, 31, 17])
tensor([42, 75, 80, 47, 64, 45, 90, 11, 93, 92])
tensor([90, 27, 50,  6, 22, 10,  7, 99, 41, 45])
tensor([81, 58, 73, 85, 85,  3, 36, 11, 13,  9])
tensor([ 2, 80, 38, 91, 43, 89, 46, 72, 17, 36])
tensor([ 0, 15, 66, 53, 28, 17, 59, 52, 81, 26])
tensor([78, 63, 44, 96, 73,  3, 32, 19, 89, 31])
tensor([18, 61,  0, 25, 38, 52, 79, 26, 54, 94])
tensor([30, 62, 82,  2, 15, 66, 67, 45, 74, 31])
tensor([ 9, 50, 15,  4,  4, 31, 36, 91, 18, 43])
tensor([18, 72, 61, 66, 60, 51, 94, 40, 60, 54])
tensor([ 33,  87,  92,  73,  99,  18,  84,  64, 100,  40])
