In [16]:
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms.functional import crop
from torchvision.io import read_image
import pandas as pd
from tqdm import tqdm
import os
import json
from PIL import Image

In [17]:
DATA_DIR = r"C:\Users\grk\git\StyleForge\data"

In [18]:
df = pd.read_csv("deepfashion_train.csv")

In [19]:
df

Unnamed: 0,id,image_path,pair_id,style,x_1,y_1,x_2,y_2,label
0,1,data\validation\image\000001.jpg,1,1,199,190,287,269,0
1,1,data\validation\image\000001.jpg,1,2,204,189,293,414,1
2,2,data\validation\image\000002.jpg,1,1,170,121,280,215,0
3,2,data\validation\image\000002.jpg,1,2,176,120,292,383,1
4,3,data\validation\image\000003.jpg,3,1,151,241,279,435,2
...,...,...,...,...,...,...,...,...,...
36245,32149,data\validation\image\032149.jpg,2372,1,345,606,707,1290,4940
36246,32150,data\validation\image\032150.jpg,2372,1,418,676,636,1221,4940
36247,32151,data\validation\image\032151.jpg,2372,1,358,645,801,1247,4940
36248,32152,data\validation\image\032152.jpg,2372,1,196,469,674,1221,4940


In [5]:
df.loc[[0]]['image_path'].values[0]

'data\\validation\\image\\000001.jpg'

In [5]:
class DeepFashionDataset(Dataset):
    def __init__(self, data_dir, mode, transform=None, target_transform=None):
        self.df = pd.read_csv("deepfashion_train.csv")
        self.mode = mode
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.loc[[idx]]
        image = Image.open(row['image_path'].values[0])
        bbox = row['x_1'].values[0], row['x_2'].values[0], row['y_1'].values[0], row['y_2'].values[0]
        cropped_image = image.crop((bbox[0], bbox[2], bbox[1], bbox[3]))
        label = row['label'].values[0]
        if self.transform:
            cropped_image = self.transform(cropped_image)
        if self.target_transform:
            label = self.target_transform(label)
        return cropped_image, label

In [6]:
dataset = DeepFashionDataset(DATA_DIR, 'validation')

In [7]:
dataset[123]

(<PIL.Image.Image image mode=RGB size=355x379>, 49)

# Dataset preparation

First we convert a bunch of json's to a pandas dataframe

In [9]:
import pandas as pd
def convert_dataset(ann_dir):
    columns = ["id", "image_path", "pair_id", "style", "x_1", "y_1", "x_2", "y_2"]
    df = pd.DataFrame(columns=columns)
    ann_list = [os.path.join(ann_dir, i) for i in os.listdir(ann_dir)]
    for ann_path in tqdm(ann_list):
        with open(ann_path, 'r') as ann_file:
            ann = json.load(ann_file)
        # count number of items in annotation
        noi = len([key for key in list(ann.keys()) if key.startswith("item")])
        # for each item in image form a row in df
        for item_num in range(1, noi+1):
            df_row = {key: None for key in columns}
            df_row["id"] = ann_path.split(".")[0].split("\\")[-1]
            df_row["image_path"] = ann_path.replace("annos", "image").split(".")[0] + ".jpg"
            df_row["pair_id"] = ann["pair_id"]
            df_row["style"] = ann["item" + str(item_num)]["style"]
            bbox = ann["item" + str(item_num)]["bounding_box"]
            df_row["x_1"], df_row["y_1"], df_row["x_2"], df_row["y_2"] = bbox
            df_row = pd.DataFrame([df_row])
            df = pd.concat([df, df_row], ignore_index=True)
    # just checkpoint
    df.to_csv('deepfashion.csv')    

In [20]:
# convert annos to pandas df
convert_dataset(r"data\train\annos")
# load checkpoint
df = pd.read_csv("deepfashion.csv", index_col=0)
# we only take the items which possibly have pairs
df_pairs = df[df['style'] != 0]
# create label column
df_pairs['label'] = df_pairs.groupby(['pair_id', 'style']).ngroup()
# save df
df_pairs.to_csv('deepfashion_train.csv', index=False)

100%|██████████| 32153/32153 [05:47<00:00, 92.64it/s] 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pairs['label'] = df_pairs.groupby(['pair_id', 'style']).ngroup()


# Import stuff

In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Subset

### MNIST code originally from https://github.com/pytorch/examples/blob/master/mnist/main.py ###
from torchvision import datasets, transforms

from pytorch_metric_learning import distances, losses, miners, reducers, testers
from pytorch_metric_learning.utils.accuracy_calculator import AccuracyCalculator

# Test model declaration

In [9]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, 2)
        self.conv2 = nn.Conv2d(32, 64, 3, 2)
        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(7*7*64, 128)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        return x

In [15]:
### MNIST code originally from https://github.com/pytorch/examples/blob/master/mnist/main.py ###
def train(model, loss_func, mining_func, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, labels) in enumerate(train_loader):
        data, labels = data.to(device), labels.to(device)
        optimizer.zero_grad()
        embeddings = model(data)
        indices_tuple = mining_func(embeddings, labels)
        loss = loss_func(embeddings, labels, indices_tuple)
        loss.backward()
        optimizer.step()
        if batch_idx % 20 == 0:
            print(
                "Epoch {} Iteration {}: Loss = {}, Number of mined triplets = {}".format(
                    epoch, batch_idx, loss, mining_func.num_triplets
                )
            )


### convenient function from pytorch-metric-learning ###
def get_all_embeddings(dataset, model):
    tester = testers.BaseTester(dataloader_num_workers=0, batch_size=batch_size)
    return tester.get_all_embeddings(dataset, model)


### compute accuracy using AccuracyCalculator from pytorch-metric-learning ###
def test(train_set, test_set, model, accuracy_calculator):
    train_embeddings, train_labels = get_all_embeddings(train_set, model)
    test_embeddings, test_labels = get_all_embeddings(test_set, model)
    train_labels = train_labels.squeeze(1)
    test_labels = test_labels.squeeze(1)
    print("Computing accuracy")
    accuracies = accuracy_calculator.get_accuracy(
        test_embeddings, test_labels, train_embeddings, train_labels, False
    )
    print("Test set accuracy (Precision@1) = {}".format(accuracies["precision_at_1"]))


device = torch.device("cuda")

train_transform = transforms.Compose(
    [
        transforms.Resize((256, 256)),
        transforms.RandomHorizontalFlip(0.5),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

val_transform = transforms.Compose(
    [
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

batch_size = 256

dataset = DeepFashionDataset(DATA_DIR, 'train', transform=train_transform)
evens = list(range(0, len(dataset), 2))
odds = list(range(1, len(dataset), 2))
trainset = Subset(dataset, evens)
valset = Subset(dataset, odds)
#dataset2 = DeepFashionDataset(DATA_DIR, 'test', transform=val_transform)
train_loader = torch.utils.data.DataLoader(
    trainset, batch_size=batch_size, shuffle=True
)
test_loader = torch.utils.data.DataLoader(valset, batch_size=batch_size)

model = Net().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01)
num_epochs = 10


### pytorch-metric-learning stuff ###
distance = distances.CosineSimilarity()
reducer = reducers.ThresholdReducer(low=0)
loss_func = losses.TripletMarginLoss(margin=0.2, distance=distance, reducer=reducer)
mining_func = miners.TripletMarginMiner(
    margin=0.2, distance=distance, type_of_triplets="semihard"
)
accuracy_calculator = AccuracyCalculator(include=("precision_at_1",), k=1)
### pytorch-metric-learning stuff ###

for epoch in range(1, num_epochs + 1):
    train(model, loss_func, mining_func, device, train_loader, optimizer, epoch)
    test(trainset, valset, model, accuracy_calculator)

Epoch 1 Iteration 0: Loss = 0.10620295256376266, Number of mined triplets = 3129
Epoch 1 Iteration 20: Loss = 0.11587725579738617, Number of mined triplets = 433
Epoch 1 Iteration 40: Loss = 0.10214606672525406, Number of mined triplets = 699
Epoch 1 Iteration 60: Loss = 0.09975206106901169, Number of mined triplets = 479


100%|██████████| 71/71 [00:57<00:00,  1.23it/s]
100%|██████████| 71/71 [01:07<00:00,  1.05it/s]


Computing accuracy
Test set accuracy (Precision@1) = 0.10866320377335961
Epoch 2 Iteration 0: Loss = 0.10144372284412384, Number of mined triplets = 933
Epoch 2 Iteration 20: Loss = 0.09729523956775665, Number of mined triplets = 681
Epoch 2 Iteration 40: Loss = 0.10172709822654724, Number of mined triplets = 619
Epoch 2 Iteration 60: Loss = 0.10664625465869904, Number of mined triplets = 346


100%|██████████| 71/71 [00:59<00:00,  1.19it/s]
100%|██████████| 71/71 [01:11<00:00,  1.01s/it]


Computing accuracy
Test set accuracy (Precision@1) = 0.12335064779986865
Epoch 3 Iteration 0: Loss = 0.1058802530169487, Number of mined triplets = 898
Epoch 3 Iteration 20: Loss = 0.10500578582286835, Number of mined triplets = 605
Epoch 3 Iteration 40: Loss = 0.10518316179513931, Number of mined triplets = 712
Epoch 3 Iteration 60: Loss = 0.09104671329259872, Number of mined triplets = 588


100%|██████████| 71/71 [01:25<00:00,  1.20s/it]
100%|██████████| 71/71 [01:05<00:00,  1.09it/s]


Computing accuracy
Test set accuracy (Precision@1) = 0.13188847095348977
Epoch 4 Iteration 0: Loss = 0.09957858920097351, Number of mined triplets = 947
Epoch 4 Iteration 20: Loss = 0.09831001609563828, Number of mined triplets = 671
Epoch 4 Iteration 40: Loss = 0.09937715530395508, Number of mined triplets = 591
Epoch 4 Iteration 60: Loss = 0.09839677810668945, Number of mined triplets = 781


100%|██████████| 71/71 [01:02<00:00,  1.14it/s]
100%|██████████| 71/71 [01:06<00:00,  1.06it/s]


Computing accuracy
Test set accuracy (Precision@1) = 0.12048480506298884
Epoch 5 Iteration 0: Loss = 0.1063404232263565, Number of mined triplets = 361
Epoch 5 Iteration 20: Loss = 0.09988199174404144, Number of mined triplets = 874
Epoch 5 Iteration 40: Loss = 0.10146575421094894, Number of mined triplets = 497
Epoch 5 Iteration 60: Loss = 0.0985591784119606, Number of mined triplets = 930


100%|██████████| 71/71 [01:02<00:00,  1.14it/s]
100%|██████████| 71/71 [01:03<00:00,  1.12it/s]


Computing accuracy
Test set accuracy (Precision@1) = 0.12782852707624337
Epoch 6 Iteration 0: Loss = 0.10968848317861557, Number of mined triplets = 541
Epoch 6 Iteration 20: Loss = 0.09816888719797134, Number of mined triplets = 575
Epoch 6 Iteration 40: Loss = 0.10574333369731903, Number of mined triplets = 480


KeyboardInterrupt: 

In [None]:
import numpy as np
import cv2
import sys
# Load image as string from file/database    
fd = open(r'1.raw',  encoding="latin1")
img_str = fd.read()
fd.close()

In [12]:
fd.close()

In [17]:
import numpy as np
fd = open('1.raw', 'r', encoding='latin1')
rows = 480
cols = 640
f = np.fromfile(fd, dtype=np.uint8,count=rows*cols)
im = f.reshape((rows, cols)) #notice row, column format
fd.close()

In [20]:
len(trainset)

18125

In [None]:
torch.utils