In [64]:
import torch
from torch import nn
from torch.nn import functional as F
from scipy.spatial.distance import mahalanobis
import numpy as np

In [65]:
class HeatedUpScalar(nn.Module):
    def __init__(self, first_value, last_value, nb_steps, **kwargs):
        super().__init__()

        self.first_value = first_value
        self.step = (max(first_value, last_value) - min(first_value, last_value)) / nb_steps

        if first_value > last_value:
            self._factor = -1
        else:
            self._factor = 1

        self._increment = 0

        print("Heated-up factor is {}.".format(self.factor))

    def on_task_end(self):
        self._increment += 1
        print("Heated-up factor is {}.".format(self.factor))

    @property
    def factor(self):
        return self.first_value + (self._factor * self._increment * self.step)

    def forward(self, inputs):
        return self.factor * inputs

In [70]:
h = HeatedUpScalar(4, 16, 6)

Heated-up factor is 4.0.


In [71]:
for _ in range(6):
    h.on_task_end()

Heated-up factor is 6.0.
Heated-up factor is 8.0.
Heated-up factor is 10.0.
Heated-up factor is 12.0.
Heated-up factor is 14.0.
Heated-up factor is 16.0.


In [52]:
y = torch.mm(x[..., 0], x[..., 1].t())
y.shape, y.sum()

(torch.Size([10, 10]), tensor(0.4027))

In [49]:
x[..., 0].shape

torch.Size([10, 2])

In [59]:
x

In [57]:
anchor = x[..., 0]
negative = x[..., 1]

neg_dis = torch.sum(torch.mul(anchor,negative),1)
dim = anchor.size(1)
gor = torch.pow(torch.mean(neg_dis),2) + torch.clamp(torch.mean(torch.pow(neg_dis,2))-1.0/dim, min=0.0)

gor, neg_dis.shape

(tensor(1.2924), torch.Size([10]))

In [47]:
torch.dot(
    x[0, 0],
    x[0, 1]
)

tensor(0.2049)

In [48]:
torch.mm(
    x[0, 0].view(1, -1),
    x[0, 1].view(-1,1)
)

tensor([[0.2049]])

In [14]:
x = torch.arange(4).float() + 1
x, F.normalize(x,dim=0)

(tensor([1., 2., 3., 4.]), tensor([0.1826, 0.3651, 0.5477, 0.7303]))

In [51]:
proxy_per_class = 2
nb_class = 4
nb_samples = 5

x = F.normalize(torch.randn(nb_samples, 10), dim=1, p=2)
y = F.normalize(torch.randn(nb_class * proxy_per_class, 10), dim=1, p=2)

targets = torch.randint(nb_class, size=(5,))
targets

tensor([0, 1, 0, 2, 2])

In [52]:
simi = torch.mm(x, y.t())
simi.shape

torch.Size([5, 8])

In [None]:
def proxy_nca(similarities, targets, proxy_per_class)

In [38]:
simi.shape[0]

4

In [36]:
targets = torch.tensor([0, 1, 2, 3])

In [37]:
numerator = torch.exp(torch.diagonal(simi.transpose(0, 1)[targets]))
mask = torch.ones(simi.shape[0], simi.shape[1])
mask[torch.arange(simi.shape[0]), targets] = 0

denominator = torch.exp(simi) * mask

-torch.log(numerator / denominator.sum(-1))

tensor([1.2707, 1.9096, 2.1086, 1.5890])

In [4]:
similarities= torch.randn((5,3)).to("cuda:0")
targets = torch.tensor([0, 0, 1, 1, 2]).to("cuda:0")

In [5]:
def foo(similarities, targets, s=30, m=0.4):
    numerator = s * (torch.diagonal(similarities.transpose(0, 1)[targets]) - m)
    denominator_but_right = torch.cat(
        [
            torch.cat((similarities[i, :y], similarities[i, y + 1:])).unsqueeze(0)
            for i, y in enumerate(targets)
        ],
        dim=0
    )
    denominator = torch.exp(numerator) + torch.sum(torch.exp(s * denominator_but_right), dim=1)
    loss = numerator - torch.log(denominator)
    return -torch.mean(loss)

In [6]:
foo(similarities, targets)

tensor(42.8319, device='cuda:0')

In [7]:
%timeit foo(similarities, targets)

1.12 ms ± 114 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [8]:
def baz(similarities, targets, s=30, m=0.4):    
    numerator = s * (torch.diagonal(similarities.transpose(0, 1)[targets]) - m)
    
    all_deno = torch.exp(s * similarities)
    all_deno[torch.arange(all_deno.shape[0]), targets] *= 0
        
    denominator = torch.exp(numerator) + torch.sum(all_deno, dim=1)
    loss = numerator - torch.log(denominator)
    return -torch.mean(loss)

In [11]:
baz(similarities, targets)

tensor(42.8319, device='cuda:0')

In [10]:
%timeit baz(similarities, targets)

506 µs ± 76.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [14]:
def bar(similarities, targets, s=30, m=0.4):    
    numerator = s * (torch.diagonal(similarities.transpose(0, 1)[targets]) - m)
    
    all_deno = torch.exp(s * similarities)
    mask = torch.ones(all_deno.shape[0], all_deno.shape[1]).to(all_deno.device)
    mask[torch.arange(all_deno.shape[0]), targets] = 0
    
    all_deno = all_deno * mask
        
    denominator = torch.exp(numerator) + torch.sum(all_deno, dim=1)
    loss = numerator - torch.log(denominator)
    return -torch.mean(loss)

In [15]:
bar(similarities, targets)

tensor(42.8319, device='cuda:0')

In [16]:
%timeit bar(similarities, targets)

430 µs ± 42.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [92]:
all_deno = torch.exp(1 * similarities)
for row_idx in range(similarities.shape[0]):
    all_deno[row_idx, targets[row_idx]] *= 0.
all_deno

tensor([[0.0000, 0.3718, 1.4330],
        [0.0000, 0.7630, 3.7366],
        [0.3122, 0.0000, 0.5891],
        [2.0226, 0.0000, 1.7250],
        [0.6983, 0.4463, 0.0000]], device='cuda:0')

In [93]:
all_deno = torch.exp(1 * similarities)
all_deno[torch.arange(all_deno.shape[0]), targets] *= 0
all_deno

tensor([[0.0000, 0.3718, 1.4330],
        [0.0000, 0.7630, 3.7366],
        [0.3122, 0.0000, 0.5891],
        [2.0226, 0.0000, 1.7250],
        [0.6983, 0.4463, 0.0000]], device='cuda:0')

In [71]:
similarities.view(-1)[targets.shape[0] + targets]

tensor([ 1.3182,  1.3182, -1.1642, -1.1642, -0.8996], device='cuda:0')

In [74]:
similarities.view(-1)

tensor([ 0.5933, -0.9895,  0.3597,  2.2619, -0.2704,  1.3182, -1.1642, -0.8996,
        -0.5291,  0.7044,  1.2560,  0.5452, -0.3590, -0.8067, -0.8009],
       device='cuda:0')

In [73]:
all_deno = torch.exp(similarities

tensor([[ 0.5933, -0.9895,  0.3597],
        [ 2.2619, -0.2704,  1.3182],
        [-1.1642, -0.8996, -0.5291],
        [ 0.7044,  1.2560,  0.5452],
        [-0.3590, -0.8067, -0.8009]], device='cuda:0')

In [19]:
def mer_non_vectorized(x, y):
    loss = 0.
    for i in range(x.shape[0]):
        for j in range(x.shape[1]):
            loss += (x[i][j] - y[i][j]) * torch.log(x[i][j])
            
    return loss / x.shape[0]

mer_non_vectorized(x, y)

tensor(nan)

In [10]:
def mer_vectorized(x, y):
    return torch.mean(((x - y) * torch.log(x)).sum(-1), dim=0)
    
mer_vectorized(x, y)

tensor(0.5632)

In [72]:
x = torch.tensor([
    [1, 2, 0],
    [0, 2, 3],
    [1, 1, 0],
    [5, 8, 3]
]).float()
x.shape

torch.Size([4, 3])

In [73]:
torch.pdist(x)

tensor([3.1623, 1.0000, 7.8102, 3.3166, 7.8102, 8.6023])

In [None]:
x.addmm_(1, -2, inputs, inputs.t())

In [22]:
torch.mm(x, -x.t())

tensor([[ -5.,  -4.,  -3., -21.],
        [ -4., -13.,  -2., -25.],
        [ -3.,  -2.,  -2., -13.],
        [-21., -25., -13., -98.]])

In [81]:
x = torch.randn(3, 10)
y = torch.randn(3, 10)

In [84]:
torch.mean(torch.abs(torch.pdist(x) - torch.pdist(y)))

tensor(0.6034)

In [74]:
x = torch.randn(3, 10, 7, 7)
y = torch.randn(3, 10, 7, 7)

In [69]:
x = F.relu(x).sum(dim=1).view(x.shape[0], -1)
y = F.relu(y).sum(dim=1).view(x.shape[0], -1)

In [70]:
torch.frobenius_norm(F.normalize(x, dim=1)- F.normalize(y, dim=1))

tensor(0.9623)

In [71]:
torch.frobenius_norm(F.normalize(y, dim=1)- F.normalize(x, dim=1))

tensor(0.9623)

In [288]:
t = torch.tensor([0, 1, 0, 1])

In [289]:
xx = F.normalize(x, dim=1, p=2)
simi = 2 * (1 - torch.mm(xx, xx.transpose(1, 0)) + 1e-8)

simi.argsort(dim=1)

tensor([[0, 3, 1, 2],
        [1, 2, 0, 3],
        [2, 1, 3, 0],
        [3, 0, 2, 1]])

In [290]:
indexes = simi.argsort(dim=1)[..., 1:]
indexes

tensor([[3, 1, 2],
        [2, 0, 3],
        [1, 3, 0],
        [0, 2, 1]])

In [291]:
simi_exp = torch.exp(simi)
simi_exp

tensor([[ 1.0000, 11.8433, 27.0889,  9.4486],
        [11.8433,  1.0000,  2.6142, 22.4042],
        [27.0889,  2.6142,  1.0000, 14.4634],
        [ 9.4486, 22.4042, 14.4634,  1.0000]])

In [292]:
y = torch.zeros(t.shape[0], t.shape[0])
for row_index in range(t.shape[0]):
    col_indexes = (t == t[row_index])
    y[row_index, col_indexes] = 1
y

tensor([[1., 0., 1., 0.],
        [0., 1., 0., 1.],
        [1., 0., 1., 0.],
        [0., 1., 0., 1.]])

In [294]:
mask = torch.eye(t.shape[0]).byte()
y_nodiag = y[~mask].byte()
simi_nodiag = simi_exp[~mask]

y_nodiag

tensor([0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0], dtype=torch.uint8)

In [295]:
p_simi = simi_nodiag[y_nodiag].sum()
n_simi = simi_nodiag[~y_nodiag].sum()

In [296]:
log_p_simi = torch.log(p_simi)
log_pn_simi = torch.log(p_simi + n_simi)

In [297]:
loss = log_pn_simi - log_p_simi

In [298]:
loss

tensor(0.5739)

In [299]:
t_mat = t.view(t.shape[0], 1)
t_mat

tensor([[0],
        [1],
        [0],
        [1]])

In [306]:
t_mat2 = (t_mat == t_mat.t()).float()
t_mat2

tensor([[1., 0., 1., 0.],
        [0., 1., 0., 1.],
        [1., 0., 1., 0.],
        [0., 1., 0., 1.]])

In [307]:
t_mat3 = t_mat2 / t_mat2.sum(dim=-1)
t_mat3

tensor([[0.5000, 0.0000, 0.5000, 0.0000],
        [0.0000, 0.5000, 0.0000, 0.5000],
        [0.5000, 0.0000, 0.5000, 0.0000],
        [0.0000, 0.5000, 0.0000, 0.5000]])

In [304]:
F.binary_cross_entropy(F.softmax(simi, dim=-1), t_mat3)

tensor(0.7768)

In [313]:
t_mat3[~mask].view(mask.shape[0], mask.shape[0]-1)

tensor([[0.0000, 0.5000, 0.0000],
        [0.0000, 0.0000, 0.5000],
        [0.5000, 0.0000, 0.0000],
        [0.0000, 0.5000, 0.0000]])

In [316]:
simi

tensor([[-2.1842e-07,  2.4718e+00,  3.2991e+00,  2.2459e+00],
        [ 2.4718e+00, -4.5684e-07,  9.6095e-01,  3.1092e+00],
        [ 3.2991e+00,  9.6095e-01,  2.0000e-08,  2.6716e+00],
        [ 2.2459e+00,  3.1092e+00,  2.6716e+00, -2.1842e-07]])

In [320]:
tmp = simi[~mask].view(mask.shape[0], mask.shape[0]-1)
F.softmax(tmp, dim=1)

tensor([[0.2448, 0.5599, 0.1953],
        [0.3213, 0.0709, 0.6078],
        [0.6133, 0.0592, 0.3275],
        [0.2040, 0.4837, 0.3123]])

In [354]:
torch.matmul((simi[mask] + simi_nodiag[y_nodiag]).view(-1, 1).repeat(2, 1).t(), simi_nodiag[~y_nodiag])

tensor([1886.7542])

In [345]:
simi_nodiag[~y_nodiag]

torch.Size([8])

In [353]:
(simi[mask] + simi_nodiag[y_nodiag]).view(-1, 1).repeat(2, 1)

torch.Size([8, 1])

In [360]:
xa = torch.randn(1, 10)
xp = torch.randn(1, 10)
xn = torch.randn(1, 10)

In [366]:
torch.mm((xa + xp), xn.t())[0][0]

tensor(-4.7054)

In [311]:
        diag_false = tf.constant(np.array([0.0 for _ in range(self._batch_size)], dtype="float32"))
        y_pos_no_diago = tf.matrix_set_diag(y_true, diag_false)

        y_pos_no_diago = K.cast(y_pos_no_diago, tf.bool)
        y_pos = K.cast(y_true, tf.bool)

        p_similarity = tf.boolean_mask(y_pred, y_pos_no_diago)
        p_similarity = K.reshape(p_similarity, (self._batch_size, self._images_per_label - 1))

        n_similarity = tf.boolean_mask(y_pred, ~y_pos)
        n_similarity = K.reshape(
            n_similarity, (self._batch_size, self._batch_size - self._images_per_label)
        )

        sum_exp_p_similarity = K.sum(tf.exp(p_similarity), axis=1)
        sum_exp_n_similarity = K.sum(tf.exp(n_similarity), axis=1)
        log_sum_exp_p_similarity = K.log(sum_exp_p_similarity)
        log_sum_exp_pn_similarity = K.log(sum_exp_p_similarity + sum_exp_n_similarity)
        loss = log_sum_exp_pn_similarity - log_sum_exp_p_similarity
        loss = K.mean(loss)

NameError: name 'tf' is not defined

In [327]:
torch.mm(x, x.t()).shape

torch.Size([4, 4])

In [328]:
torch.mm(x.t(), x).shape

torch.Size([10, 10])

In [331]:
torch.tan?

In [356]:
import numpy as np

In [358]:
np.ones(8) * 5

array([5., 5., 5., 5., 5., 5., 5., 5.])