In [1]:
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, models, transforms
import torchvision.transforms as transforms
from PIL import Image
from os import listdir
import numpy as np
import torch
from torch.utils.data.sampler import SubsetRandomSampler
import string
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

In [2]:
torch.cuda.empty_cache()

In [None]:
pip install pytorch_pretrained_vit

In [3]:
path_to_dataset = "./vk-made-sports-image-classification"

In [4]:
train = pd.read_csv(path_to_dataset + "/train.csv")
train.head()

Unnamed: 0,image_id,label
0,46514481-2d8b-4d49-8991-012e1bfd34f6.jpeg,swimming
1,ec66e513-adac-4a30-b6a9-3d647ee6e46b.jpeg,greco-Roman_wrestling
2,4d60732e-d680-4bfd-9067-70ff8137f537.jpeg,running
3,93327011-8e3d-4f0d-849d-a26ddaf6488b.jpeg,football
4,b6853478-48c1-48b2-b104-74903730c831.jpeg,sailing


In [5]:
idx_to_label = {i: el for i, el in enumerate(train.label.unique())}
label_to_idx = {el: i for i, el in enumerate(train.label.unique())}

In [6]:
train.label.nunique()

30

In [7]:
class SportDataset(Dataset):
    def __init__(self, data_path, transform=transforms.Compose([transforms.PILToTensor(),
                                                               transforms.Resize((384,384)),
                                                               transforms.ConvertImageDtype(float)])):
        self.transform = transform
        self.data_path = data_path
        self.data = pd.read_csv(data_path + "/train.csv")
        self.idx_to_label = {i: el for i, el in enumerate(self.data.label.unique())}
        self.label_to_idx = {el: i for i, el in enumerate(self.data.label.unique())}
        #print(self.label_to_idx)

    def __getitem__(self, indx):
        data = self.data.iloc[indx]
        img = self.transform(Image.open(self.data_path + "/train/" + data["image_id"]).convert('RGB'))
        tensor = torch.zeros(self.data.label.nunique())
        label = data["label"]
        tensor[self.label_to_idx[label]] = 1
        return img.float().to("cuda:0"), tensor.to("cuda:0")

    def __len__(self):
        return self.data.shape[0]

In [8]:
dataset = SportDataset(path_to_dataset)

In [9]:
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

In [None]:
# nimages = 0
# mean = 0.
# std = 0.
# for batch, _ in train_loader:
#     # Rearrange batch to be the shape of [B, C, W * H]
#     batch = batch.view(batch.size(0), batch.size(1), -1)
#     # Update total number of images
#     nimages += batch.size(0)
#     # Compute mean and std here
#     mean += batch.mean(2).sum(0) 
#     std += batch.std(2).sum(0)

# # Final step
# mean /= nimages
# std /= nimages

# print(mean)
# print(std)

In [14]:
#model = models.resnet152(pretrained=True)
from pytorch_pretrained_vit import ViT
model = ViT('L_32_imagenet1k', pretrained=True)
for param in model.parameters():
    param.requires_grad = False

Loaded pretrained weights.


In [16]:
num_ftrs = model.fc.in_features
model.fc = nn.Sequential(nn.Linear(1024, train.label.nunique()))

model = model.to("cuda:0")
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [17]:
model.fc

Sequential(
  (0): Linear(in_features=1024, out_features=30, bias=True)
)

In [None]:
for epoch in range(20):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs = inputs.to("cuda:0")
        labels = labels.to("cuda:0")
        
        optimizer.zero_grad()
        #print(inputs)
        outputs = model(inputs)
        #print(outputs, labels)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    print(running_loss)
print('Finished Training')

In [None]:
test = pd.read_csv(path_to_dataset + "/test.csv")
test.head()

In [None]:
transform=transforms.Compose([transforms.PILToTensor(), transforms.Resize((384,384)),
        transforms.ConvertImageDtype(float)])
model.eval()
j = 0
answers = []
for el in test.image_id:
    img = transform(Image.open(path_to_dataset + "/test/" + el).convert('RGB'))
    img = img.float().to("cuda:0")
    answers.append(idx_to_label[model(img.unsqueeze(0)).argmax().item()])
test["label"] = answers

In [None]:
model(img.unsqueeze(0)).argmax().item()

In [None]:
test.head()

In [None]:
test.to_csv("Kugushev_vit32L.csv", index=False)

In [None]:
e = pd.read_csv("Kugushev.csv", index_col=False)
e = e.drop('Unnamed: 0', axis=1)
e.to_csv("Kugushev_seq512I.csv", index=False)