# Zero-shot learning for image classification 

original data and code can be found here https://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-computing/research/zero-shot-learning/zero-shot-learning-the-good-the-bad-and-the-ugly/)
[Akata, et al. CVPR2015]
[Xian, et al. CVPR2017]

Download prepared data of Animal with attributes from: 
https://drive.google.com/open?id=1ErU12Q2sHhB2Lb7NCQuan0K3qXP78RJj

In [10]:
import numpy as np 

In [11]:
# load prepared data 
data_dict = np.load('/Users/garethjones/Downloads/data_dict.npz', encoding = 'latin1')['data'].item()

tr_theta_x = data_dict['tr_theta_x'] # training image features extracted from deep CNN
tr_labels = data_dict['tr_labels'] # training image labels as indices matching class embeddings and names
val_theta_x = data_dict['val_theta_x']# validation image features extracted from deep CNN
val_labels = data_dict['val_labels'] # validation image labels as indices matching class embeddings and names
test_theta_x = data_dict['test_theta_x'] # test image features extracted from deep CNN
test_labels = data_dict['test_labels'] # test image labels as indices matching class embeddings and names

# class embeddings are a vector of features, in this case 85, for each class
# this is the side knowledge, like we saw with polar bear example
# when we compute weights of our model based on the training class embeddings, we'll be automatically able to link test images to the right vectors
class_embeddings = data_dict['phi_y'] # class attributes vectors provided by the original dataset AWA. this is the side information
class_names = data_dict['class_name'] # class names in the same order as embeddings 

In [12]:
# print training, validation, and test class names
# note that class_embeddings and class_names 

# create classes dictionary
class_dict = {}
index = list(range(len(class_names)))

for i in range(len(class_names)):
    class_dict[index[i]] = class_names[i]
class_dict

# set quickly finds the unique values in a list
unique_tr_labels = list(set(tr_labels))
unique_val_labels = list(set(val_labels))
unique_test_labels = list(set(test_labels))

# list enumartion to find the unique names of labels
tr_names = [class_dict[i] for i in unique_tr_labels]
val_names = [class_dict[i] for i in unique_val_labels]
test_names = [class_dict[i] for i in unique_test_labels]

print(tr_names)
print(val_names)
print(test_names)
print(len(tr_names)+len(val_names)+len(test_names))


['antelope', 'grizzly+bear', 'killer+whale', 'persian+cat', 'german+shepherd', 'siamese+cat', 'skunk', 'tiger', 'hippopotamus', 'spider+monkey', 'humpback+whale', 'elephant', 'fox', 'squirrel', 'rhinoceros', 'wolf', 'chihuahua', 'weasel', 'otter', 'buffalo', 'zebra', 'pig', 'lion', 'mouse', 'polar+bear', 'collie', 'cow']
['beaver', 'dalmatian', 'giant+panda', 'deer', 'mole', 'leopard', 'moose', 'raccoon', 'gorilla', 'ox', 'chimpanzee', 'hamster', 'rabbit']
['rat', 'horse', 'blue+whale', 'bobcat', 'walrus', 'dolphin', 'sheep', 'seal', 'bat', 'giraffe']
50


In [53]:
def train(tr_theta_x,tr_labels,lr=0.0001,n_epochs=2,W=None):
    
    ####### use SGD to minimize SJE loss ######### 
    # Initialize W (DxE)
    # foreach epoch 
        # Shuffle training samples
        # foreach sample (xi, ytrue)
            # 1. scoretrue =  𝛳(xi) * WT * 𝜙(ytrue)
            # 2. lossmax= -1,  ymax = -1
            # foreach training label ytr  
                # 1. score =  𝛳(xi) * WT *𝜙(ytr)
                # 2. loss = Δ(ytr,ytrue) + scoretrue - score
                # 3. if loss > lossmax --> update lossmax and ymax
            # 4. if ymax ≠ ytrue --> W = W - lr * 𝛳(xi) [𝜙(ytrue) - 𝜙(ymax)]
    
    W = np.random.rand(tr_theta_x.shape[1],class_embeddings.shape[1])
    
    for e in range(n_epochs):
        tr_labels = np.reshape(tr_labels,(len(tr_labels),1))
        combined = np.append(tr_theta_x,tr_labels,axis=1)
        np.random.shuffle(combined)
        tr_theta_x_shuf = combined[:,:-1]
        tr_labels_shuf = combined[:,-1]
        
        for i in range(len(tr_theta_x_shuf)):
            phi_ytrue = class_embeddings[tr_labels[i],:]
            scoretrue = np.dot(np.dot(tr_theta_x_shuf[i],W),phi_ytrue.T)
            lossmax = -1
            ymax = -1
            
            for j in list(set(map(int,tr_labels_shuf))):
                phi_ytr = class_embeddings[j,:]
                score = np.dot(np.dot(tr_theta_x_shuf[i],W),phi_ytr.T) 
                
                if np.array_equal(phi_ytr,phi_ytrue) == True:
                    loss = 0
                else:
                    loss = 1 + score - scoretrue
                
                if loss > lossmax:
                        lossmax = loss
                        ymax = j 
            
            if ymax != tr_labels_shuf[i]:
                phi_diff = phi_ytrue - class_embeddings[ymax,:]
                # you need to do outer here to get the right shape to subtract
                W = W - lr*np.outer(tr_theta_x_shuf[i],phi_diff) # second term is the gradient of our loss function
                    
    return W

W = train(tr_theta_x,tr_labels)

In [33]:
print('Weight matrix shape is '+str(W.shape))
print('Class Embeddings shape is '+str(class_embeddings.shape))
print('Training features (theta) shape is '+str(tr_theta_x.shape))
print('Training labels (classes) shape is '+str(tr_labels.shape))

Weight matrix shape is (2048, 85)
Class Embeddings shape is (50, 85)
Training features (theta) shape is (20218, 2048)
Training labels (classes) shape is (20218, 1)


In [46]:
def predict(x,test_class_indices,W):
    
    # add your implementation
    
    #  max_score = -1,  ymax = -1
    # foreach label in test_class_indices for i in test_class_indices: 
        # score =  𝛳(xi) * WT *𝜙(label) 
        # if score > max_score --> update max_score and ymax
    
    max_score = -1
    ymax = -1
    
    for i in list(set(map(int,test_class_indices))):
        phi_label = class_embeddings[i,:]
        score = np.dot(x,np.dot(W,phi_label.T))
        if score > max_score:
            max_score = score
            ymax = i 
            
    return ymax

x = test_theta_x[1] # only ever input one image
test_class_indices = list(set(test_labels)) # unique set

ymax = predict(x,test_class_indices,W)
print(ymax)

33


In [51]:
def evaluate(X,Y,test_class_indices,W):

    # correct_pred = 0
    # foreach sample (xi, ytrue)
        # pred_label = predict(xi,test_class_indices,W)
        # if pred_label == ytrue:
            # correct_pred += 1
    # acc = correct_pred / size of test set
    
    correct_pred = 0
    
    for i in range(len(X)):
        ytrue = Y[i]
        pred_label = predict(X[i],test_class_indices,W)
        if pred_label == ytrue:
            correct_pred += 1
    
    acc = correct_pred / len(X)
    
    return acc

X = test_theta_x
Y = test_labels
test_class_indices = list(set(test_labels)) # unique set

acc = '{:.3f}'.format(evaluate(X,Y,test_class_indices,W))
print(acc)

0.039
