# Zero-shot learning for image classification 

original data and code can be found here https://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-computing/research/zero-shot-learning/zero-shot-learning-the-good-the-bad-and-the-ugly/)
[Akata, et al. CVPR2015]
[Xian, et al. CVPR2017]

Download prepared data of Animal with attributes from: 
https://drive.google.com/open?id=1ErU12Q2sHhB2Lb7NCQuan0K3qXP78RJj

In [1]:
import numpy as np 

In [2]:
def l2_norm(x):
    return x / (np.linalg.norm(x) + 10e-6)

In [3]:
# load prepared data 
your_path_to_data_dict= 'data/data_dict.npz' # PATH TO DATA
data_dict = np.load(your_path_to_data_dict,encoding = 'latin1')['data'].item()

tr_theta_x = data_dict['tr_theta_x'] # training image features extracted from deep CNN
tr_labels = data_dict['tr_labels'] # training image labels as indices matching class embeddings and names
val_theta_x = data_dict['val_theta_x']# validation image features extracted from deep CNN
val_labels = data_dict['val_labels'] # validation image labels as indices matching class embeddings and names
test_theta_x = data_dict['test_theta_x'] # test image features extracted from deep CNN
test_labels = data_dict['test_labels'] # test image labels as indices matching class embeddings and names

class_embeddings = data_dict['phi_y'] # class attributes vectors provided by the original dataset AWA

class_names = data_dict['class_name'] # class names in the same order as embeddings 

In [4]:
# print training, validation, and test class names
# note that class_embeddings and class_names 
len(class_names)
# add your implementation   

50

In [5]:
print(tr_theta_x.shape)
print(val_theta_x.shape)
print(val_labels.shape)
print(test_theta_x.shape)
print(test_labels.shape)
print('-----')
print(class_embeddings.shape)
print(len(class_names))

(20218, 2048)
(9191, 2048)
(9191,)
(7913, 2048)
(7913,)
-----
(50, 85)
50


In [6]:
np.array(class_names)[list(set(tr_labels))]

array(['antelope', 'grizzly+bear', 'killer+whale', 'persian+cat',
       'german+shepherd', 'siamese+cat', 'skunk', 'tiger', 'hippopotamus',
       'spider+monkey', 'humpback+whale', 'elephant', 'fox', 'squirrel',
       'rhinoceros', 'wolf', 'chihuahua', 'weasel', 'otter', 'buffalo',
       'zebra', 'pig', 'lion', 'mouse', 'polar+bear', 'collie', 'cow'],
      dtype='<U15')

In [7]:
np.array(class_names)[list(set(val_labels))]

array(['beaver', 'dalmatian', 'giant+panda', 'deer', 'mole', 'leopard',
       'moose', 'raccoon', 'gorilla', 'ox', 'chimpanzee', 'hamster',
       'rabbit'], dtype='<U15')

In [8]:
np.array(class_names)[list(set(test_labels))]

array(['rat', 'horse', 'blue+whale', 'bobcat', 'walrus', 'dolphin',
       'sheep', 'seal', 'bat', 'giraffe'], dtype='<U15')

In [9]:
tr_theta_x.shape

(20218, 2048)

In [10]:
def train(X,Y,LR,T,W=None):
    N = X.shape[0]
    Y_tr = np.unique(Y, axis=0)

    if W is None:
        W = np.ones((Y.shape[1], X.shape[1]))

    for i in range(T): # T is number of epochs
        running_loss = 0
        
        print('epoch: ', i)
        np.random.seed(123)
        X = np.random.permutation(X)
        np.random.seed(123)
        Y = np.random.permutation(Y)
        
        for i in range(N):
            score_true = np.dot(l2_norm(np.dot(X[i], W.T)), Y[i])

            loss_max = -1
            y_max = -1
            
            for label in Y_tr:
                score = np.dot(l2_norm(np.dot(X[i], W.T)), label)
                loss = np.all(Y[i] != label) + score - score_true
                if loss > loss_max:
                    loss_max = loss
                    y_max = label
            if not np.all(y_max == Y[i]):
                W = W + LR * np.expand_dims(X[i], 0) * np.expand_dims((Y[i] - y_max),1)
                
            running_loss += loss
        
        print(f'running loss: {running_loss}')
            
    return W

In [11]:
def predict(x, test_class_indices, W):
    
    # add your implementation
    
    max_score = -1
    y_max = -1
    for i in test_class_indices:
        score = np.dot(l2_norm(np.dot(x, W.T)), class_embeddings[i])
        if score > max_score:
            max_score = score
            y_max = i
    # foreach label in test_class_indices for i in test_class_indices: 
        # score =  𝛳(xi) * WT *𝜙(label) 
        # if score > max_score --> update max_score and ymax
        
    return y_max

In [12]:
def evaluate(X,Y,W):
    N = X.shape[0]
    Y_test = np.unique(Y, axis=0)

    correct_pred = 0
    # foreach sample (xi, ytrue)
    for i in range(N):
        pred_label = predict(X[i], Y_test, W)
        # pred_label = predict(xi,test_class_indices,W)
        if pred_label == Y[i]:
            correct_pred += 1
        acc = correct_pred / N
        # if pred_label == ytrue:
            # correct_pred += 1
    # acc = correct_pred / size of test set
    
    return acc

In [13]:
LR = 0.001
T = 10
Y = []
for i, label in enumerate(tr_labels):
    Y.append(class_embeddings[label])
Y = np.array(Y)
print(Y.shape)
W = train(tr_theta_x, Y, LR, T)

(20218, 85)
epoch:  0
running loss: -449.80546035214286
epoch:  1
running loss: -512.1371045217775
epoch:  2
running loss: -565.0919452457666
epoch:  3
running loss: -612.3440366521853
epoch:  4
running loss: -653.5339771141836
epoch:  5
running loss: -687.9722996319962
epoch:  6
running loss: -720.2364327874731
epoch:  7
running loss: -751.1044502277916
epoch:  8
running loss: -781.0078137510255
epoch:  9
running loss: -810.0876422995375


In [None]:
evaluate(val_theta_x, val_labels, W)