In [1]:
from helpers import *
import torch
import torch.nn.functional as F
import torch.nn as nn
import numpy as np

In [2]:
def sample(x, y, bs):
    idx = np.random.choice(x.size(0), bs)
    return x[idx].cuda(), y[idx].cuda()
    
def validate(x, s, model):
    model.eval()
    with torch.no_grad():
        out = model(x)
        out = out.mm(s.transpose(0,1))
    return out
        
def train(x, y, s, model, opt, marg=.7, bs=500):
    model.train()
    idx = torch.arange(0, bs, dtype=torch.long, device="cuda")
    out  = model(x)
    out = F.sigmoid(out.mm(s.t()))
    val = out[idx, y].unsqueeze(1)
    zeros = torch.zeros_like(val)
    out = torch.max(zeros, marg-val+out)
    loss = out.mean()
    opt.zero_grad()
    loss.backward()
    opt.step()
    return loss.item()

def train_model(tr_feat, tr_lbl, tr_sem, te_feat, te_lbl, te_sem, k=5, bs=2000, nbatch=500, marg=.7, ):
    vis_dim = tr_feat.size(1)
    sem_dim = tr_sem.size(1)
    n_class = tr_lbl.max() + 1
    
    model = nn.Linear(vis_dim, sem_dim, bias=False).cuda()
    crit  = nn.CrossEntropyLoss()
    opt   = torch.optim.Adam(model.parameters())
    accs,losses  = [],[]
    s_tr = tr_sem.cuda()
    #x_te, y_te, s_te = te_feat.cuda(), te_lbl.cuda(), te_sem.cuda()
    
    for i in tqdm.tqdm(range(nbatch)):
        x,y  = sample(tr_feat, tr_lbl, bs)
        losses.append(train(x, y, s_tr, model, opt, marg=marg, bs=bs))#, base_idx)
        #accs.append(validate(x_te, y_te, s_te, model))
    
    return model, losses, accs

# Training

In [3]:
x_tr, y_tr, s_tr = load_train_set(norm_sem=True)
model, losses, accs = train_model(x_tr, y_tr, s_tr, None, None, None)

993it [01:57,  8.44it/s]
100%|██████████| 500/500 [00:04<00:00, 110.06it/s]


# Standard ZSL

In [4]:
x_te,y_te,s_te = load_test_set(generalized=False, norm_sem=True)
out = validate(x_te.cuda(), s_te.cuda(), model)
pp(topk(out, y_te.cuda()))

500it [00:13, 37.28it/s]


 11.12 | 17.61 | 22.35 | 26.37 | 29.65 


# Generalized ZSL

In [5]:
x_te,y_te,s_te = load_test_set(generalized=True, norm_sem=True)
out = validate(x_te.cuda(), s_te.cuda(), model)

1493it [00:20, 72.30it/s]


In [6]:
msk_test  = y_te<500
msk_train = y_te>500

train_accs = topk(out[msk_train], y_te[msk_train].cuda())
test_accs  = topk(out[msk_test], y_te[msk_test].cuda())

#pp(test_accs)
#pp(train_accs)
pp(h_scores(test_accs, train_accs))

 7.08 | 13.32 | 17.74 | 21.86 | 25.66 
