In [1]:
print("Hello")

Hello


In [2]:
from transformers import ViTImageProcessor, ViTModel, AutoImageProcessor
from PIL import Image
import requests

url = 'https://lumiere-a.akamaihd.net/v1/images/darth-vader-main_4560aff7.jpeg?region=71%2C0%2C1139%2C854'
image = Image.open(requests.get(url, stream=True).raw)

processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224-in21k')
image_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
model = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')
inputs = processor(images=image, return_tensors="pt")

outputs = model(**inputs)
last_hidden_states = outputs.last_hidden_state


  from .autonotebook import tqdm as notebook_tqdm


In [10]:
import torch
from datasets import load_dataset
from tqdm import tqdm
dataset = load_dataset("mnist")
data = []
for row in tqdm(dataset['train'], total = len(dataset['train'])):
    image = row['image']
    label = row['label']
    image = image.resize((32, 32))
    image = image.convert("RGB")
    image = processor(images=image, return_tensors="pt")
    image = image['pixel_values']
    new_row = [image, label]
    data.append(new_row)
import random
random.shuffle(data)
train_split = data[:10000]
test_split = data[10000:11000]


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50000/50000 [04:28<00:00, 185.89it/s]


In [11]:
# split into train and test, take 10000 train samples and 1000 test samples
# randomly shuffle the data list
import random
random.shuffle(data)
train_split = data[:10000]
test_split = data[10000:11000]

In [12]:
labels = []
for row in data:
    labels.append(row[1])



In [13]:
print(set(labels))

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}


In [42]:
from torch import nn, optim
class ptuned_VIT(nn.Module):
    def __init__(self, num_classes):
        super(ptuned_VIT, self).__init__()
        self.model = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')
        self.embeddings = nn.Embedding(3, 768)
        self.classification_layer = nn.Linear(768, num_classes)
        self.softmax = nn.Softmax(dim=1)
        self.trainable = [self.embeddings, self.classification_layer]
        for param in self.model.parameters():
            param.requires_grad = False
    def forward(self, x):
        tens = [0,1,2]
        tens = torch.tensor(tens)
        tens = tens.to(device)
        x1 = self.embeddings(tens)
        x2 = self.model.embeddings(x)
        # change x1 shape from x, 768 to 1, x, 768
        x1 = x1.unsqueeze(0)
        # concat x1 and x2 along the first dimension
        # x1 is 1, 3, 768, make is x2.shape[0], 3, 768
        x1 = x1.expand(x2.shape[0], -1, -1)
        x = torch.cat((x1, x2), 1)
        x = self.model.encoder(x)
        x = x['last_hidden_state']
        x = self.model.pooler(x)
        x = self.classification_layer(x)
        x = self.softmax(x)
        return x
        
classes = len(set(labels))
p_tokens = 3
import torch
from torch import nn, optim
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

# use data loader
train_loader = torch.utils.data.DataLoader(train_split, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_split, batch_size=32, shuffle=True)




In [50]:
print(len(train_loader))

313


In [56]:
# use corss entropy loss
# import accuracy and f1 from sklearn
model = ptuned_VIT(classes)
model = model.to(device)
from sklearn.metrics import accuracy_score, f1_score
loss_func = nn.CrossEntropyLoss()
# use adam optimizer
optimizer = optim.Adam(model.parameters(),lr=0.001)
acc_list = []
n_epochs = 10
fin_acc = []
for epoch in range(n_epochs):
    batch_no = 0
    for sample in train_loader:
        image = sample[0]
        label = sample[1]
        image = image.to(device)
        label = label.to(device)
        # iamge shape is a, 1, b, c, d make it a, b, c, d
        image = image.squeeze(1)
        output = model(image)
        loss = loss_func(output, label)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        # calculate accuracy
        pred = torch.argmax(output, dim=1)
        acc = accuracy_score(label.cpu(), pred.cpu())
        acc_list.append(acc)
        running_acc = sum(acc_list)/len(acc_list)
        fin_acc.append(acc)
        print(f"Epoch: {epoch}, Batch: {batch_no}/313, Loss: {loss.item()}, Running Accuracy: {running_acc}, Current Accuracy {acc}")
        batch_no += 1
    # write testing loop using test_loader
    acc_list = []
    for sample in test_loader:
        image = sample[0]
        label = sample[1]
        image = image.to(device)
        label = label.to(device)
        image = image.squeeze(1)
        output = model(image)
        pred = torch.argmax(output, dim=1)
        acc = accuracy_score(label.cpu(), pred.cpu())
        acc_list.append(acc)
        running_acc = sum(acc_list)/len(acc_list)
        print(f"Epoch: {epoch}, Test Accuracy: {running_acc}")
    torch.save(model, "pt_vit_cifar_model.pt")
    torch.save(fin_acc, "pt_vit_cifar_acc.pt")



Epoch: 0, Batch: 0/313, Loss: 2.2956485748291016, Running Accuracy: 0.125, Current Accuracy 0.125
Epoch: 0, Batch: 1/313, Loss: 2.306826114654541, Running Accuracy: 0.09375, Current Accuracy 0.0625
Epoch: 0, Batch: 2/313, Loss: 2.2720913887023926, Running Accuracy: 0.13541666666666666, Current Accuracy 0.21875
Epoch: 0, Batch: 3/313, Loss: 2.2937982082366943, Running Accuracy: 0.1328125, Current Accuracy 0.125
Epoch: 0, Batch: 4/313, Loss: 2.2543506622314453, Running Accuracy: 0.15625, Current Accuracy 0.25
Epoch: 0, Batch: 5/313, Loss: 2.258364200592041, Running Accuracy: 0.16145833333333334, Current Accuracy 0.1875
Epoch: 0, Batch: 6/313, Loss: 2.2514963150024414, Running Accuracy: 0.17857142857142858, Current Accuracy 0.28125
Epoch: 0, Batch: 7/313, Loss: 2.2462456226348877, Running Accuracy: 0.19921875, Current Accuracy 0.34375
Epoch: 0, Batch: 8/313, Loss: 2.1920371055603027, Running Accuracy: 0.22916666666666666, Current Accuracy 0.46875
Epoch: 0, Batch: 9/313, Loss: 2.1922924518

KeyboardInterrupt: 

In [67]:
torch.save(model, "pt_vit_cifar_model.pt")
torch.save(fin_acc, "pt_vit_cifar_acc.pt")

In [69]:
new_mod = torch.load("pt_vit_cifar_model.pt")
new_list = torch.load("pt_vit_cifar_acc.pt")
print(new_list)

[0.125, 0.0625, 0.21875, 0.125, 0.25, 0.1875, 0.28125, 0.34375, 0.46875, 0.46875, 0.53125, 0.59375, 0.375, 0.46875, 0.59375, 0.34375, 0.5, 0.59375, 0.5625, 0.59375, 0.5625, 0.6875, 0.6875, 0.65625, 0.65625, 0.65625, 0.78125, 0.78125, 0.78125, 0.78125, 0.90625, 0.71875, 0.78125, 0.84375, 0.8125, 0.90625, 0.96875, 0.90625, 0.96875, 0.96875, 0.84375, 0.8125, 0.875, 0.875, 0.84375, 0.78125, 0.8125, 0.9375, 0.875, 0.9375, 0.9375, 0.84375, 0.875, 0.78125, 0.9375, 0.90625, 0.96875, 0.9375, 0.90625, 0.9375, 0.84375, 0.875, 0.875, 0.96875, 0.9375, 0.96875, 0.84375, 0.875, 0.96875, 0.90625, 0.90625, 0.90625, 0.96875, 0.90625, 0.90625, 0.96875, 0.96875, 0.8125, 0.90625, 0.9375, 0.9375, 0.9375, 0.9375, 1.0, 0.90625, 0.875, 0.90625, 0.96875, 1.0, 0.9375, 0.875, 0.90625, 0.96875, 0.875, 0.8125, 0.9375, 0.96875, 0.9375, 0.9375, 0.9375, 0.9375, 0.875, 0.9375, 0.90625, 0.9375, 0.96875, 0.9375, 0.96875, 0.96875, 0.90625, 0.90625, 0.96875, 0.875, 0.9375, 0.96875, 0.9375, 0.9375, 1.0, 1.0, 1.0, 0.84375, 1

: 