# Arcface vs. Cosface vs. Arcface+Cosface

In [1]:
import torch
from torch.nn.functional import linear, normalize

labels = torch.randint(low=0, high=10, size=(6,))
print(labels)
index = torch.where(labels!=-1)[0]
print(index)

tensor([4, 6, 0, 5, 5, 3])
tensor([0, 1, 2, 3, 4, 5])


In [2]:
batch_size = 6
num_class = 5

weights = torch.nn.Parameter(torch.FloatTensor(num_class, 512))
torch.nn.init.xavier_uniform_(weights)
embed_vec = torch.randn((batch_size,512))
logits = linear(normalize(embed_vec), normalize(weights)).clamp(-1,1)
print(logits)
print(logits.shape)
print()

labels = torch.randint(low=0, high=num_class, size=(batch_size,))
print(labels)
index = torch.where(labels!=-1)[0]
print(index)


tensor([[-0.0016, -0.0394, -0.0146,  0.0258, -0.0268],
        [ 0.0209, -0.0399,  0.0914, -0.0066,  0.0424],
        [-0.0661, -0.0771,  0.0389, -0.0413, -0.0049],
        [-0.0681,  0.0132, -0.0275,  0.0343,  0.0055],
        [-0.0756, -0.0029,  0.0225, -0.0044, -0.0222],
        [-0.0753, -0.0096,  0.1256,  0.0286, -0.0812]],
       grad_fn=<ClampBackward1>)
torch.Size([6, 5])

tensor([1, 0, 0, 2, 4, 1])
tensor([0, 1, 2, 3, 4, 5])


In [3]:
target = logits[index, labels[index].view(-1)]
print(target)

tensor([-0.0394,  0.0209, -0.0661, -0.0275, -0.0222, -0.0096],
       grad_fn=<IndexBackward0>)


In [4]:
## Arcface(0.5)
from marginloss import CombinedMarginLoss
softmax = CombinedMarginLoss(s=10, m1=1.0, m2=0.5, m3=0.0)
softmax(logits, labels)

tensor([[-0.0159, -5.1361, -0.1460,  0.2579, -0.2678],
        [-4.6100, -0.3994,  0.9139, -0.0662,  0.4240],
        [-5.3635, -0.7706,  0.3888, -0.4132, -0.0485],
        [-0.6811,  0.1322, -5.0339,  0.3428,  0.0551],
        [-0.7555, -0.0288,  0.2246, -0.0440, -4.9878],
        [-0.7528, -4.8778,  1.2565,  0.2857, -0.8119]], grad_fn=<MulBackward0>)

In [5]:
## Cosface(0.35)
softmax = CombinedMarginLoss(s=10, m1=0.0, m2=0.0, m3=0.35)
softmax(logits, labels)

tensor([[-0.0159, -8.6361, -0.1460,  0.2579, -0.2678],
        [-8.1100, -0.3994,  0.9139, -0.0662,  0.4240],
        [-8.8635, -0.7706,  0.3888, -0.4132, -0.0485],
        [-0.6811,  0.1322, -8.5339,  0.3428,  0.0551],
        [-0.7555, -0.0288,  0.2246, -0.0440, -8.4878],
        [-0.7528, -8.3778,  1.2565,  0.2857, -0.8119]], grad_fn=<MulBackward0>)

In [6]:
## Arcface(0.5) + Cosface(0.35)
softmax = CombinedMarginLoss(s=10, m1=0.5, m2=0.5, m3=0.35)
softmax(logits, labels)

tensor([[ -0.0159, -13.4960,  -0.1460,   0.2579,  -0.2678],
        [-13.4221,  -0.3994,   0.9139,  -0.0662,   0.4240],
        [-14.7606,  -0.7706,   0.3888,  -0.4132,  -0.0485],
        [ -0.6811,   0.1322, -13.4883,   0.3428,   0.0551],
        [ -0.7555,  -0.0288,   0.2246,  -0.0440, -13.4837],
        [ -0.7528, -13.4699,   1.2565,   0.2857,  -0.8119]],
       grad_fn=<MulBackward0>)

# ==================

# Comparsion

In [4]:
import torch
import torch.nn as nn
from dataset import get_dataloader
from marginloss import CombinedMarginLoss
from fclayer import FCSoftmax
from model import get_model
ROOT_DIR = "/home/ljj0512/private/workspace/CV-project/Computer-Vision-Project/train"

class args:
    device = "cuda:3"
    local_rank = 3
    batch_size = 6
    workers = 4

train_loader, x, valid_loader, y = get_dataloader(args)
print(len(train_loader))
print(len(valid_loader))


train dataset length:  5,822,653
valid dataset length:  13,233
970443
2206


## arcface

In [5]:
model = get_model(ROOT_DIR).to(args.device)
margin_loss = CombinedMarginLoss(64, 1.0, 0.5, 0.0).to(args.device)
fc_softmax = FCSoftmax(margin_loss, 512, 85742).to(args.device)
criterion = nn.CrossEntropyLoss().to(args.device)

In [6]:
for inputs, targets in train_loader:
    print(inputs.shape)
    print(targets.shape)
    print(targets)
    embed_vec = model(inputs)
    logits = fc_softmax(embed_vec, targets)
    loss = criterion(logits, targets)
    print(logits.shape)
    _, predicted = torch.max(logits.data, dim=1)
    print(predicted.shape)
    break

torch.Size([6, 3, 112, 112])
torch.Size([6])
tensor([51699, 67147, 23901, 55625, 40658,  3701], device='cuda:3')
torch.Size([6, 85742])
torch.Size([6])


In [7]:
print(predicted)

tensor([55173, 44096, 41145, 65458, 32807, 24839], device='cuda:3')


In [10]:
fc_softmax.weights.shape

torch.Size([85742, 512])

In [10]:
inputs = torch.randn((6,3,112,112))
labels = torch.randint(low=0,high=100,size=(6,))
print(inputs.shape)
print(labels.shape)
print()

margin_loss = CombinedMarginLoss(64, 1.0, 0.5, 0.0)
fc_softmax = FCSoftmax(margin_loss, 512, 85742)
criterion = nn.CrossEntropyLoss()

# with torch.no_grad():
model.train()
fc_softmax.train()
embed_vec = model(inputs)
print(embed_vec.shape)
logits = fc_softmax(embed_vec, labels)
print(logits.shape)
_, predicted = torch.max(logits.data, dim=1)
print(predicted.shape)
loss = criterion(logits, labels)
print(loss.item())
print("finish")
# model = PartialFC(margin_loss, 512, 93431, 1.0, True)

torch.Size([6, 3, 112, 112])
torch.Size([6])

torch.Size([6, 512])
torch.Size([6, 85742])
torch.Size([6])
46.125762939453125
finish


In [8]:
print('=> the number of model parameters: {:,}'.format(sum([p.data.nelement() for p in fc_softmax.parameters()])))

=> the number of model parameters: 43,899,904
