In [1]:
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms.functional import crop
from torchvision.io import read_image
import pandas as pd
from tqdm import tqdm
import os
import json
from PIL import Image
from metric_learning.datasets import split_train_test

seed = 123
train_classes_frac = 0.7
num_epochs = 20

# Dataset preparation

First we convert a bunch of json's to a pandas dataframe

In [2]:
from metric_learning.datasets import convert_dataset
#convert_dataset(ann_dir="data/validation/annos/", output_path="deepfashion_val.csv")
#convert_dataset(ann_dir="data/train/annos/", output_path="deepfashion_train.csv")

Now we create a dataset class

In [3]:
class DeepFashionDataset(Dataset):
    def __init__(self, df, transform=None, target_transform=None):
        self.df = df
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[[idx]]
        image = Image.open(row['image_path'].values[0])
        bbox = row['x_1'].values[0], row['x_2'].values[0], row['y_1'].values[0], row['y_2'].values[0]
        cropped_image = image.crop((bbox[0], bbox[2], bbox[1], bbox[3]))
        label = row['label'].values[0]
        if self.transform:
            cropped_image = self.transform(cropped_image)
        if self.target_transform:
            label = self.target_transform(label)
        return cropped_image, label

In [4]:
# read dataframes
#df_train = pd.read_csv("deepfashion_train.csv")
#df_val = pd.read_csv("deepfashion_val.csv")


# for quick tests we use one dataframe and split it into test and val
df = pd.read_csv("deepfashion_val.csv")
df_train, df_val = split_train_test(df, train_classes_frac, seed)
num_classes_train = len(df_train.label.unique())
# reassign class numbers so it doesn't go over num_classes_train
df_train['label'] = df_train.groupby(['label']).ngroup()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train['label'] = df_train.groupby(['label']).ngroup()


# Import stuff

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Subset

### MNIST code originally from https://github.com/pytorch/examples/blob/master/mnist/main.py ###
from torchvision import datasets, transforms

from pytorch_metric_learning import distances, losses, miners, reducers, testers
from pytorch_metric_learning.utils.accuracy_calculator import AccuracyCalculator

# Test model declaration

In [6]:
import torch.nn as nn

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, 2, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 2, 1)
        self.conv3 = nn.Conv2d(64, 128, 3, 2, 1)
        self.conv4 = nn.Conv2d(128, 256, 3, 2, 1)
        self.dropout1 = nn.Dropout2d(0.25)
        self.fc1 = nn.Linear(65536, 512)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.conv3(x)
        x = F.relu(x)
        x = self.conv4(x)
        x = F.relu(x)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        return x

training

In [7]:
import torch
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

from pytorch_metric_learning import losses, testers
from pytorch_metric_learning.utils.accuracy_calculator import AccuracyCalculator


def train(model, loss_func, device, train_loader, optimizer, loss_optimizer, epoch):
    model.train()
    for batch_idx, (data, labels) in enumerate(train_loader):
        data, labels = data.to(device), labels.to(device)
        optimizer.zero_grad()
        loss_optimizer.zero_grad()
        embeddings = model(data)
        loss = loss_func(embeddings, labels)
        loss.backward()
        optimizer.step()
        loss_optimizer.step()
        if batch_idx % 100 == 0:
            print("Epoch {} Iteration {}: Loss = {}".format(epoch, batch_idx, loss))


def get_all_embeddings(dataset, model):
    tester = testers.BaseTester(dataloader_num_workers=0, batch_size=batch_size)
    return tester.get_all_embeddings(dataset, model)


### compute accuracy using AccuracyCalculator from pytorch-metric-learning ###
def test(query_dataset, retrieval_dataset, model, accuracy_calculator):
    query_embeddings, query_labels = get_all_embeddings(query_dataset, model)
    retrieval_embeddings, retrieval_labels = get_all_embeddings(retrieval_dataset, model)
    query_labels = query_labels.squeeze(1)
    retrieval_labels = retrieval_labels.squeeze(1)
    print("Computing accuracy")
    accuracies = accuracy_calculator.get_accuracy(
        query_embeddings, query_labels, retrieval_embeddings, retrieval_labels, False
    )
    print("Test set accuracy (Precision@1) = {}".format(accuracies["precision_at_1"]))


device = torch.device("cuda")

train_transform = transforms.Compose(
    [
        transforms.Resize((256, 256)),
        transforms.RandomHorizontalFlip(0.5),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

val_transform = transforms.Compose(
    [
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

batch_size = 64


train_dataset = DeepFashionDataset(df_train, transform=train_transform)
val_dataset = DeepFashionDataset(df_val, transform=val_transform)
# we need to split val dataset for query and retrieval sets
# let's just take evens and odds for the first test
#!!!!!!!!!!!!!!!!! SAME CLASS SHOULD BE IN BOTH SETS
evens = list(range(0, len(val_dataset), 2))
odds = list(range(1, len(val_dataset), 2))
query_dataset = Subset(val_dataset, evens)
retrieval_dataset = Subset(val_dataset, odds)


train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True
)

model = Net().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01)


loss_func = losses.SubCenterArcFaceLoss(num_classes=num_classes_train, embedding_size=512).to(device)
loss_optimizer = torch.optim.Adam(loss_func.parameters(), lr=1e-4)
accuracy_calculator = AccuracyCalculator(include=("precision_at_1",), k=1)

In [8]:
test(query_dataset, retrieval_dataset, model, accuracy_calculator)

  0%|          | 0/173 [00:00<?, ?it/s]

In [13]:
for epoch in range(1, num_epochs + 1):
    train(model, loss_func, device, train_loader, optimizer, loss_optimizer, epoch)
    test(query_dataset, retrieval_dataset, model, accuracy_calculator)

Epoch 1 Iteration 0: Loss = 41.23060989379883
Epoch 1 Iteration 100: Loss = 40.19734191894531
Epoch 1 Iteration 200: Loss = 39.61499786376953
Epoch 1 Iteration 300: Loss = 39.23011016845703


  0%|          | 0/173 [00:00<?, ?it/s]

In [38]:
7*7*64

3136

In [47]:
import math
math.sqrt(61504 / 64)

31.0