In [None]:
import zipfile
from pathlib import Path
with zipfile.ZipFile('twice/dawin_data.zip','r') as zip_ref:
  print('Unzipping data')
  zip_ref.extractall()

In [None]:
!pip install vit_pytorch
!wget https://storage.googleapis.com/vit_models/imagenet21k/ViT-B_16.npz -P ./pretrained_models


In [None]:
import pathlib
image_path = 'dawin_data'

image_path = pathlib.Path(image_path)  
# Setup train and testing paths
train_dir = image_path / "train"
test_dir = image_path / "test"

train_dir, test_dir

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from vit_pytorch import ViT
import numpy as np

# Define your data transformations
data_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Check if GPU is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load your data
train_data = torchvision.datasets.ImageFolder(root=train_dir, transform=data_transform)
test_data = torchvision.datasets.ImageFolder(root=test_dir, transform=data_transform)

# Load the ViT model
model = ViT(
    image_size = 224,
    patch_size = 32,
    num_classes = 6, # Change this to the number of classes in your dataset
    dim = 768,
    depth = 12,
    heads = 12,
    mlp_dim = 3072,
    dropout = 0.1,
    emb_dropout = 0.1
)

# Load the saved model state from the .npz file
model.load_state_dict(np.load('model.npz'))

# Move the model to the GPU
model = model.to(device)

# Define your loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Define your data loaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True, num_workers=4)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=32, shuffle=False, num_workers=4)

# Train your model
for epoch in range(10):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}, loss: {running_loss/len(train_data)}")

# Evaluate your model on the test set
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy on test set: {100 * correct / total}%")

