<h1>Step 1 upload dataset metadata and photos info</h1>

In [None]:
!git clone https://github.com/pumpikano/street2shop
!mkdir street2shop/images
!bash street2shop/get_street2shop.sh
!mkdir data
!mv street2shop/meta data

 <h1>step 2 extract street photos only</h1>

In [3]:
import json

DRESS_FOLDER = "../data/json/"

ID_NAME = "retrieval_dresses.json"
TEST_NAME = "test_pairs_dresses.json"
TRAIN_NAME = "train_pairs_dresses.json"
PHOTO_FILE = "../data/photos/photos_dress.txt"

def read_json(file_name):
    with open(file_name) as f:
        obj = json.loads(f.readline())
    return obj


def get_photo_ids(obj, retr_dict):
    photos = set()
    for item in obj:
        photos.add(item["photo"])
        product_photos = retr_dict[item["product"]]
        photos.update(product_photos)
    return photos


if __name__ != "__main__":
    id_to_photo_dirty = read_json(DRESS_FOLDER+ID_NAME)
    id_to_photo_clean = {}
    for item in id_to_photo_dirty:
        if item["product"] in id_to_photo_clean:
            id_to_photo_clean[item["product"]].append(item["photo"])
        else:
            id_to_photo_clean[item["product"]] = [item["photo"]]
    test_dress = read_json(DRESS_FOLDER + TEST_NAME)
    train_dress = read_json(DRESS_FOLDER + TRAIN_NAME)
    test_set = get_photo_ids(test_dress, id_to_photo_clean)
    train_set = get_photo_ids(train_dress, id_to_photo_clean)
    result_set = test_set|train_set

    with open(PHOTO_FILE, "w") as new_ph_file:
        with open("../data/photos/photos.txt") as ph_file:
            for line in ph_file:
                id = int(line.split(",")[0])
                if id in result_set:
                    new_ph_file.write(line)

In [None]:
python download.py --urls data/photos/photos_dress.txt

In [None]:
class PhotoManager:
    def __init__(train_file, test_file):
        pass
    
    def is_street(photo_id):
        pass
    
    def getbbox(photo_id):
        pass
    
    def is_train(photo_id):
    
    def get_train_size(photo_id)

In [89]:
import os
import os.path as path

PHOTO_FOLDER = "../data/images"

ALL_IMAGES  = [int(name.split(".")[0]) for name in os.listdir(PHOTO_FOLDER) if path.isfile(PHOTO_FOLDER+"/"+name)]



In [129]:
PHOTO_FOLDER = "../data/images"
META_FOLDER = "../data/meta"
from torch.utils.data import Dataset
import random
from skimage import io, transform

def get_negative(positive, items):
    while True:
        rand_item = random.choice(items)
        if positive != rand_item:
            return rand_item
    

def get_image(photo_id, folder=PHOTO_FOLDER):
    photo_name = str(photo_id)
    zeros = "0"*(9- len(photo_name))
    photo_name = zeros+photo_name+".jpeg"
    return io.imread(folder+"/"+photo_name)

class DressDataset(Dataset):
    """street2shop dress dataset."""

    def __init__(self, triplets_file_name, photos_folder_name=PHOTO_FOLDER, retrieval_file_name=DRESS_FOLDER+ID_NAME):
        self.triplets = read_json(triplets_file_name)
        self.retrieval = read_json(retrieval_file_name)
        self.retrieval ={it["product"]:it["photo"] for it in self.retrieval}
        all_photos = [item["photo"] for item in self.triplets]
        for item in self.triplets:
            positive_photo = self.retrieval[item["product"]]
            anchor_photo = item["photo"]
            if positive_photo not in ALL_IMAGES or anchor_photo not in ALL_IMAGES:
                continue
            item["positive"] = positive_photo
            item["anchor"] = anchor_photo
            del item["product"]
            del item["photo"]
            negative = get_negative(item["positive"], ALL_IMAGES)
            item["negative"] = negative
        
        for item in self.triplets[:]:
            if "positive" not in item:
                self.triplets.remove(item)
            
        self.photos_folder = photos_folder_name

    def __len__(self):
        return len(self.triplets)

    def __getitem__(self, index):
        sample = {}
        triplet = self.triplets[index]
        sample["positive"] = get_image(triplet["positive"])
        sample["negative"] = get_image(triplet["negative"])
        bbox = triplet["bbox"]
        sample["anchor"] = get_image(triplet["anchor"])[bbox["top"]:bbox["top"]+bbox["height"],bbox["left"]:bbox["left"]+bbox["width"]]
        return sample


In [130]:
train = DressDataset(triplets_file_name=DRESS_FOLDER+TRAIN_NAME)
test = DressDataset(triplets_file_name=DRESS_FOLDER+TRAIN_NAME)


In [131]:
import torch

BATCH_SIZE = 20

cuda = torch.cuda.is_available()
kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}
train_dataset_loader = torch.utils.data.DataLoader(train,batch_size=BATCH_SIZE, shuffle=True,**kwargs)
test_dataset_loader = torch.utils.data.DataLoader(test,batch_size=BATCH_SIZE, shuffle=True,**kwargs)

<h1>define the model</h1>

In [132]:
import torch.nn as nn
import torchvision.models as models


class BasisNet(nn.Module):
    def __init__(self, embedding_size=128):
        super(BasisNet, self).__init__()
        self.inception = models.inception_v3(pretrained=True)
        self.fc1 = nn.ELU(512*3*3, 512)
        self.fc2 = nn.Linear(512, embedding_size)
    
    def forward(self,x):
        x = self.inception(x)
        x = self.fc1(x)
        x = self.fc2(x)
        return self.l2norm(x)
    
    def l2_norm(self,input):
        input_size = input.size()
        buffer = torch.pow(input, 2)

        normp = torch.sum(buffer, 1).add_(1e-10)
        norm = torch.sqrt(normp)

        _output = torch.div(input, norm.view(-1, 1).expand_as(input))

        output = _output.view(input_size)

        return output
        
class TripletNet(nn.Module):
    def __init__(self, embedding_net):
        super(TripletNet, self).__init__()
        self.embedding_net = embedding_net

    def forward(self, sample):
        output1 = self.embedding_net(sample["positive"])
        output2 = self.embedding_net(sample["anchor"])
        output3 = self.embedding_net(sample["negative"])
        return output1, output2, output3

    def get_embedding(self, x):
        return self.embedding_net(x)

In [133]:
class TripletLoss(nn.Module):
    """
    Triplet loss
    Takes embeddings of an anchor sample, a positive sample and a negative sample
    """

    def __init__(self, margin):
        super(TripletLoss, self).__init__()
        self.margin = margin

    def forward(self, anchor, positive, negative, size_average=True):
        distance_positive = (anchor - positive).pow(2).sum(1)  # .pow(.5)
        distance_negative = (anchor - negative).pow(2).sum(1)  # .pow(.5)
        losses = F.relu(distance_positive - distance_negative + self.margin)
        return losses.mean() if size_average else losses.sum()
    
    
dress_net = TripletNet(BasisNet())

In [134]:
import torch.optim as optim

optimizer = optim.SGD(dress_net.parameters(), lr=0.001, momentum=0.9)
criterion = TripletLoss(0.1) 
EPOCH = 10
for i in range(EPOCH):
    import pdb;pdb.set_trace()
    for i, data in enumerate(train):
        outputs = dress_net(data)
        loss = criterion(outputs)

> <ipython-input-134-c4337d41b672>(8)<module>()->None
-> for i, data in enumerate(train):
(Pdb) n
> <ipython-input-134-c4337d41b672>(9)<module>()->None
-> outputs = dress_net(data)
(Pdb) n
AttributeError: "'numpy.ndarray' object has no attribute 'clone'"
> <ipython-input-134-c4337d41b672>(9)<module>()->None
-> outputs = dress_net(data)
(Pdb) n
--Return--
> <ipython-input-134-c4337d41b672>(9)<module>()->None
-> outputs = dress_net(data)
(Pdb) c


AttributeError: 'numpy.ndarray' object has no attribute 'clone'