<a href="https://colab.research.google.com/github/deayalar/deeplearning_unitn/blob/main/DL_Project_wandb_embedding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!wget https://market1501.s3-us-west-2.amazonaws.com/dataset.zip
!unzip -q dataset.zip -d dataset

--2021-06-25 13:52:19--  https://market1501.s3-us-west-2.amazonaws.com/dataset.zip
Resolving market1501.s3-us-west-2.amazonaws.com (market1501.s3-us-west-2.amazonaws.com)... 52.218.209.57
Connecting to market1501.s3-us-west-2.amazonaws.com (market1501.s3-us-west-2.amazonaws.com)|52.218.209.57|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 82925180 (79M) [application/zip]
Saving to: ‘dataset.zip’


2021-06-25 13:52:21 (52.2 MB/s) - ‘dataset.zip’ saved [82925180/82925180]



In [2]:
!rm -rf /content/deeplearning_unitn
!git clone https://github.com/deayalar/deeplearning_unitn.git

Cloning into 'deeplearning_unitn'...
remote: Enumerating objects: 245, done.[K
remote: Counting objects: 100% (245/245), done.[K
remote: Compressing objects: 100% (162/162), done.[K
remote: Total 245 (delta 135), reused 138 (delta 62), pack-reused 0[K
Receiving objects: 100% (245/245), 10.73 MiB | 5.98 MiB/s, done.
Resolving deltas: 100% (135/135), done.


In [3]:
!nvidia-smi

Fri Jun 25 13:52:59 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.27       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   41C    P8     9W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [1]:
config = dict(
    wandb = True,
    device = "auto", # Select an specific device None to select automatically
    train_root = "/content/dataset/train",
    test_root = "/content/dataset/test", 
    queries_root = "/content/dataset/queries",
    attributes_file = "/content/dataset/annotations_train.csv",
    #train_root = "/media/deayalar/Data/Documents/Unitn/Deep Learning/Assignment/dataset/train",
    #test_root = "/media/deayalar/Data/Documents/Unitn/Deep Learning/Assignment/dataset/test",
    #queries_root = "/media/deayalar/Data/Documents/Unitn/Deep Learning/Assignment/dataset/queries",
    #attributes_file = "/media/deayalar/Data/Documents/Unitn/Deep Learning/Assignment/dataset/annotations_train.csv",
    dataset="Market1501",
    backbone = "resnet50",
    split = dict(
        full_training_size = 0.75,
        train_size = 0.8
    ),
    compose = dict(
        resize_h = 224,
        resize_w = 224
    ),
    epochs=10,
    training_batch_size=16,
    validation_batch_size=32,
    learning_rate=0.01,
    weight_decay=0.000001, 
    momentum=0.9,
    test_before_training=False,
    test_after_epochs=10,
    mAP_rank=15)

In [2]:
%cd /content/deeplearning_unitn

import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import numpy as np
from tqdm.notebook import tqdm

import cost_functions
from evaluation import Evaluator
from datasets.reid_dataset import Market1501
#from cost_functions import OverallLossWrapper
from utils.split_data import ValidationSplitter, TrainingSplitter
from models.reid_model import FinetunedModel
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

if config["device"] == "auto":
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
else:
    device = config["device"]
print(device)

/content/deeplearning_unitn
cuda:0


In [3]:
!pip install wandb -q
import wandb
if config["wandb"]:
  wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mdeayalar[0m (use `wandb login --relogin` to force relogin)


In [4]:
def model_pipeline(hyperparameters):
    """
    This function corresponds to the basic pipeline of all tested models
    0) Split data
    1) Setup based on the configuration
    2) Train the model
    3) Test performance
    """
    config = hyperparameters
    if config["wandb"]:
      wandb.init(entity="dl_unitn", project="dl_project", config=hyperparameters)
      config = wandb.config
    print(config)
    
    train_set, val_set, val_queries = split_data(config)
    
    model, train_loader, val_loader, val_queries_loader, criterion, optimizer = setup(train_set, val_set, val_queries, config)
    id_ground_truth_dict = build_ground_truth(val_set, val_queries)

    print("Using "+ config["backbone"] + " as backbone")
    if config["test_before_training"]:
      test(model, val_loader, val_queries_loader, id_ground_truth_dict, config)

    train(model, train_loader, val_loader, criterion, optimizer, config)

    test(model, val_loader, val_queries_loader, id_ground_truth_dict, config, save_model=True)

    return model

In [5]:
def build_ground_truth(val_set, val_queries):
    values = []
    for q in val_queries:
        matches = []
        for idx_v, v in enumerate(val_set):
            if v.split("_")[0] == q.split("_")[0]:
                matches.append(idx_v)
        value = set(matches)
        values.append(value)
        
    ground_truth_dict = dict(zip(list(range(0, len(val_queries))), values))
    return ground_truth_dict


In [6]:
def split_data(config):
    """Returns a list with the names of theimages in each set"""
    splitter = ValidationSplitter(train_root=config["train_root"], 
                                  test_root=config["test_root"], 
                                  queries_root=config["queries_root"])
    train_set, val_set, val_queries = splitter.split(train_size=config["split"]["full_training_size"],
                                                     random_seed=42)
    return train_set, val_set, val_queries

def setup(train_set, val_set, val_queries, config):
    #Create pytorch Datasets
    train_composed = transforms.Compose([ transforms.Resize((config["compose"]["resize_h"], 
                                                      config["compose"]["resize_w"])),
                                          transforms.RandomHorizontalFlip(),
                                          transforms.ToTensor(),
                                          transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                                std=[0.229, 0.224, 0.225]),
                                          transforms.RandomErasing(p=0.6)])
    
    val_composed = transforms.Compose([transforms.Resize((config["compose"]["resize_h"], 
                                                      config["compose"]["resize_w"])),
                                   transforms.ToTensor(),
                                   transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                        std=[0.229, 0.224, 0.225])])
    
    train_dataset = Market1501(root_dir=config["train_root"],
                            attributes_file=config["attributes_file"],
                            images_list=train_set,
                            transform=train_composed)
                            
    val_dataset = Market1501(root_dir=config["train_root"],
                         attributes_file=config["attributes_file"],
                         images_list=val_set,
                         transform=val_composed)

    val_queries_dataset = Market1501(root_dir=config["train_root"],
                         attributes_file=config["attributes_file"],
                         images_list=val_queries,
                         transform=val_composed)

    train_loader = torch.utils.data.DataLoader(train_dataset, 
                                               batch_size=config["training_batch_size"], 
                                               shuffle=True, 
                                               num_workers=2)
                                               
    val_loader = torch.utils.data.DataLoader(val_dataset, 
                                             batch_size=config["validation_batch_size"], 
                                             shuffle=False, 
                                             num_workers=2)

    val_queries_loader = torch.utils.data.DataLoader(val_queries_dataset, 
                                             batch_size=config["validation_batch_size"],
                                             shuffle=False, 
                                             num_workers=2)

    attr_len = len(train_dataset[0][2]) #Number of attributes in the csv: 27
    print(f"Number of attributes: {attr_len}")
    model = FinetunedModel(architecture=config["backbone"] ,n_classes=attr_len).to(device)

    #This is a combination of the attributes classification loss and the triplet loss for identification
    criterion = OverallLossWrapper()
    optimizer = torch.optim.SGD(model.parameters(), 
                                lr=config["learning_rate"], 
                                weight_decay=config["weight_decay"], 
                                momentum=config["momentum"])
    
    return model, train_loader, val_loader, val_queries_loader, criterion, optimizer

In [7]:
def compute_centroids(model, loader):
  model.eval()
  print("Computing centroids")
  all_features = np.empty((0, model.feature_size))
  ids = []
  for batch_idx, (inputs, identity, attributes) in enumerate(tqdm(loader)):
    inputs = inputs.to(device)
    batch_features = model(inputs, get_features = True)
    all_features = np.concatenate((all_features, batch_features.cpu().detach().numpy()), axis=0)
    ids.extend(list(identity))

  ids = np.array([int(i) for i in ids])
  unique_ids = np.unique(ids)

  centroids = []
  for id in unique_ids:
    id_features = all_features[ids == id]
    centroid = np.mean(id_features, axis = 0)
    centroids.append(centroid)
  return centroids, unique_ids


def train(model, train_loader, val_loader, criterion, optimizer, config):
    print("Training...")
    print(train_loader)
    # tell wandb to watch what the model gets up to: gradients, weights, and more!
    if config["wandb"]:
         wandb.watch(model, criterion, log="all", log_freq=10)
    
    # Run training and track with wandb
    total_batches = len(train_loader) * config["epochs"]
    example_ct = 0  # number of seen examples
    batch_ct = 0

    centroids, unique_ids = compute_centroids(model, train_loader)

    for epoch in tqdm(range(config["epochs"])):
        model.train()
        for batch_idx, (inputs, identity, attributes) in enumerate(train_loader):
            loss = train_batch(inputs, identity, attributes, model, optimizer, criterion, centroids, unique_ids)

            example_ct +=  len(inputs)
            batch_ct += 1

            if ((batch_ct + 1) % 50) == 0:
                train_log(loss, example_ct, epoch)

        centroids, unique_ids = compute_centroids(model, train_loader)

def train_batch(inputs, identity, attributes, model, optimizer, criterion, centroids, unique_ids):
    inputs = inputs.to(device)
    attributes = attributes.to(device)
    
    # Forward pass
    # TODO: This could be improved in the architecture to return both at the same time and improve the training time
    output_attrs = model(inputs)
    output_features = model(inputs, get_features=True)

    # Filter the centroids and pass only the ones in the batch
    centroids_batch = {}
    for i in list(identity):
      pos = np.flatnonzero(unique_ids==int(i))[0]
      centroids_batch[i] = centroids[pos]

    # Apply the loss
    loss = criterion(output_attrs, attributes, output_features, identity, centroids_batch)
    
    optimizer.zero_grad()
    # Backward pass
    loss.backward()

    # Step with optimizer
    optimizer.step()

    return loss

In [8]:
def train_log(loss, example_ct, epoch):
    loss = float(loss)
    if config["wandb"]:
        wandb.log({"epoch": epoch, "loss": loss}, step=example_ct)
    print(f"Epoch {epoch}: Loss after " + str(example_ct).zfill(5) + f" examples: {loss:.3f}")

In [9]:
def get_features_from_loader(model, loader):
    model.eval()
    with torch.no_grad():
        all_features = torch.zeros(len(loader.dataset), model.feature_size)
        for batch_idx, (inputs, ids, attr) in enumerate(tqdm(loader)):
                inputs = inputs.to(device)
                features = model(inputs, get_features=True)
                for in_batch, f in enumerate(features):
                    all_features[(batch_idx * loader.batch_size) + in_batch] = f
        return all_features


def test_mAP(model, gallery_loader, queries_loader, ground_truth_dict, config, save_model=False):
    """
    This function returns the mAP performance of a given model 
    Params:
    model: model to be evaluated
    gallery: tensor that contains the feature representations of the target images in validation or test set
    queries: tensor that contains feature representations of the queries
    rank: top number of elements to retrieve

    Returns:
    mAP performance of the model
    """

    # Run the model on some test examples
    with torch.no_grad():
        
        # Compute the features for queries and gallery
        print("Computing gallery features...")
        gallery_features = get_features_from_loader(model, gallery_loader)
        print("Computing query features...")
        query_features = get_features_from_loader(model, queries_loader)
        
        # Build the cosine similarity matrix between the all the queries and all the elements in gallery
        print("Computing cosine similarities...")
        sims_matrix = torch.empty(query_features.size()[0], gallery_features.size()[0])
        for idx, q in enumerate(query_features):
            sims_matrix[idx] = F.cosine_similarity(q, gallery_features, dim=-1)
        
        print("Similarity matrix shape: " + str(sims_matrix.size()))
        sorted_index = torch.argsort(sims_matrix, dim=1, descending=True)
        top_k = sorted_index.narrow_copy(dim=1, start=0, length=config["mAP_rank"])

        #Build the dictionary to compute the mAP
        predictions_dict = {idx:  r for idx, r in enumerate(top_k.tolist())}
        mAP = Evaluator.evaluate_map(predictions_dict, ground_truth_dict)
        
        print(f"mAP: {mAP}")
        if config["wandb"]:
            wandb.log({"mAP": mAP})

In [10]:
def get_attributes_from_loader(model, loader):
    model.eval()

    all_predictions = np.empty(shape=[0, 27], dtype=np.byte)
    all_attrs = np.empty(shape=[0, 27], dtype=np.byte)

    with torch.no_grad():
        for batch_idx, (inputs, ids, attr) in enumerate(tqdm(loader)):
                inputs = inputs.to(device)
                outputs = model(inputs, get_features=False)
                #print("attr:",attr)
                predictions = torch.empty(attr.size()[1], attr.size()[0])
                for attr_idx, output in enumerate(outputs):
                    if output.size()[1] == 1: #If the output is binary
                        pred = torch.round(torch.squeeze(output, 1))
                    else: #Otherwise it is multiclass
                        pred = torch.argmax(output, dim=1)
                    predictions[attr_idx] = pred

                predictions = torch.transpose(predictions, 0, 1).cpu().numpy()
                attr = attr.cpu().numpy()

                all_predictions = np.append(all_predictions, predictions, axis=0)
                #print("all_predictions shape: ", all_predictions.shape)
                all_attrs = np.append(all_attrs, attr, axis=0)
                #print("all_attrs shape: ", all_attrs.shape)
        return all_predictions, all_attrs

def test_attributes(model, loader, config):
    print("Computing attributes...")
    predictions, attr = get_attributes_from_loader(model, loader)
    print("pred shape: ", predictions.shape)
    print("attr shape: ", attr.shape)

    accuracy_list = []
    precision_list = []
    recall_list = []
    f1_score_list = []

    for i in range(0, predictions.shape[1]):
        y_true, y_pred = attr[:, i], predictions[:, i]
        accuracy_list.append(accuracy_score(y_true, y_pred))
        if i == 0: #If it is age
            precision_list.append(precision_score(y_true, y_pred, average='macro'))
            recall_list.append(recall_score(y_true, y_pred, average='macro'))
            f1_score_list.append(f1_score(y_true, y_pred, average='macro'))
        else:
            precision_list.append(precision_score(y_true, y_pred))
            recall_list.append(recall_score(y_true, y_pred))
            f1_score_list.append(f1_score(y_true, y_pred))

    average_acc = np.mean(accuracy_list)
    average_precision = np.mean(precision_list)
    average_recall = np.mean(recall_list)
    average_f1score = np.mean(f1_score_list)

    print("accuracy_list: ", accuracy_list)
    print("precision_list: ", precision_list)
    print("recall_list: ", recall_list)
    print("f1_score_list: ", f1_score_list)

    print("average_acc: ", average_acc)
    print("average_precision: ", average_precision)
    print("average_recall: ", average_recall)
    print("average_f1score: ", average_f1score)

    if config["wandb"]:
            wandb.log({"accuracy_list": accuracy_list})
            wandb.log({"precision_list": precision_list})
            wandb.log({"recall_list": recall_list})
            wandb.log({"f1_score_list": f1_score_list})

            wandb.log({"average accuracy": average_acc})
            wandb.log({"average precision": average_precision})
            wandb.log({"average recall": average_recall})
            wandb.log({"average f1": average_f1score})

In [11]:
def test(model, gallery_loader, queries_loader, ground_truth_dict, config, save_model=False):
    print("Testing")
    model.eval()

    test_mAP(model, gallery_loader, queries_loader, ground_truth_dict, config)
    test_attributes(model, gallery_loader, config)

    if save_model :
      # Save the model in the exchangeable ONNX format
      inputs, id, attr = next(iter(gallery_loader))
      torch.onnx.export(model, inputs.to(device), "model.onnx", True)
      if config["wandb"]:
        wandb.save("model.onnx")

In [12]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from scipy.spatial import distance

class OverallLossWrapper(nn.Module):
    def __init__(self):
        super(OverallLossWrapper, self).__init__()
        self.id_loss = TripletLoss()
        self.attr_loss = AttributesLossWrapper(0)

    def forward(self, output_attrs, target_attrs, output_features, target_ids, centroids):
        return self.id_loss(output_features, target_ids, centroids) + self.attr_loss(output_attrs, target_attrs)

class AttributesLossWrapper(nn.Module):
    def __init__(self, task_num):
        super(AttributesLossWrapper, self).__init__()
        self.task_num = task_num
        # This is to learn the weights
        #self.log_vars = nn.Parameter(torch.zeros((task_num)))

    def forward(self, preds, attrs):

        bce = nn.BCELoss()
        crossEntropy = nn.CrossEntropyLoss()

        loss_age = crossEntropy(preds[0], attrs[:,0])

        binary_losses = 0
        for idx in range(1, len(preds)):
            binary_losses += bce(preds[idx], attrs[:, idx].unsqueeze(1).to(torch.float32))
        return loss_age + binary_losses

class TripletLoss(nn.Module):
    """Triplet loss with hard positive/negative mining.
    Reference:
    Hermans et al. In Defense of the Triplet Loss for Person Re-Identification. arXiv:1703.07737.
    Code imported from https://github.com/Cysu/open-reid/blob/master/reid/loss/triplet.py.
    Args:
        margin (float): margin for triplet.
    """
    def __init__(self, margin=0.3, mutual_flag = False):
        super(TripletLoss, self).__init__()
        self.margin = margin
        self.ranking_loss = nn.MarginRankingLoss(margin=margin)
        self.mutual = mutual_flag

    def forward(self, inputs, targets, centroids_batch):
        """
        Args:
            inputs: feature matrix with shape (batch_size, feat_dim)
            targets: ground truth labels with shape (num_classes)
        """
        #print(centroids_batch)
        #print(inputs.size())
        str_targets = targets
        targets = torch.Tensor(np.array([int(el) for el in targets]))
        n = inputs.size(0)
        # inputs = 1. * inputs / (torch.norm(inputs, 2, dim=-1, keepdim=True).expand_as(inputs) + 1e-12)
        # Compute pairwise distance, replace by the official when merged

        dist = torch.pow(inputs, 2).sum(dim=1, keepdim=True).expand(n, n)
        dist = dist + dist.t()
        dist.addmm_(1, -2, inputs, inputs.t())
        dist = dist.clamp(min=1e-12).sqrt()  # for numerical stability
        # For each anchor, find the hardest positive and negative
        mask = targets.expand(n, n).eq(targets.expand(n, n).t())
        dist_ap, dist_an = [], []
        p_centroid = []
        for i in range(n):
            identity = str_targets[i]
            p_centroid.append(distance.euclidean(inputs[i].detach().cpu().numpy(), centroids_batch[identity]))
            dist_ap.append(dist[i][mask[i]].max().unsqueeze(0))
            dist_an.append(dist[i][mask[i] == 0].min().unsqueeze(0))

        dist_ap = torch.cat(dist_ap)
        dist_an = torch.cat(dist_an)
        # Compute ranking hinge loss
        y = torch.ones_like(dist_an)

        #loss = self.ranking_loss(dist_an, dist_ap, y)
        loss = self.ranking_loss(dist_an, torch.from_numpy(np.array(p_centroid)).to(device), y)
        if self.mutual:
            return loss, dist
        return loss

In [None]:
model = model_pipeline(config)

{'wandb': True, 'device': 'auto', 'train_root': '/content/dataset/train', 'test_root': '/content/dataset/test', 'queries_root': '/content/dataset/queries', 'attributes_file': '/content/dataset/annotations_train.csv', 'dataset': 'Market1501', 'backbone': 'resnet50', 'split': {'full_training_size': 0.75, 'train_size': 0.8}, 'compose': {'resize_h': 224, 'resize_w': 224}, 'epochs': 10, 'training_batch_size': 16, 'validation_batch_size': 32, 'learning_rate': 0.01, 'weight_decay': 1e-06, 'momentum': 0.9, 'test_before_training': False, 'test_after_epochs': 10, 'mAP_rank': 15}
Extract queries proportion: 0.11
Identities in train set: 563
Identities in validation set: 188
Train set size: 9631
Validation set size: 2988
Number of validation queries: 370
Number of attributes: 27
Backbone feature size: 2048
Using resnet50 as backbone
Training...
<torch.utils.data.dataloader.DataLoader object at 0x7fe827fdee50>
Computing centroids


HBox(children=(FloatProgress(value=0.0, max=602.0), HTML(value='')))

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)





HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))

	addmm_(Number beta, Number alpha, Tensor mat1, Tensor mat2)
Consider using one of the following signatures instead:
	addmm_(Tensor mat1, Tensor mat2, *, Number beta, Number alpha) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:1025.)


Epoch 0: Loss after 00784 examples: 26.820
Epoch 0: Loss after 01584 examples: 20.848


In [None]:
a = torch.rand((32, 512))
b = torch.rand((563, 512))

dist = torch.cdist(a, b, p=2)

negative = min (dist[0]) that is not the same id
positive = max (dist[0]) that is the same id