In [1]:
#
# prepareAWA1.ipynb
#
# Jupyter Notebook to prepare AWA1 dataset for further zero-shot learning tasks 
# AWA1  : Animals With Attributes 1
#
# n_clases    : 50
# n_attributes: 85
#
# Written by cetinsamet -*- cetin.samet@metu.edu.tr
# December, 2019 

In [2]:
import scipy.io as sio
import numpy as np
import os

In [3]:
def read_file(filepath):
    file_content = []
    with open(filepath, 'r') as infile:
        for line in infile:
            file_content.append(line.strip())
    
    return file_content

In [4]:
AWA1_PATH = "./AWA1"
AWA1P_PATH = "./AWA1P" #Preprocessed AWA1 directory

In [5]:
# Read training classes  ---  Number of Classes: 27
train_classes = read_file(os.path.join(AWA1_PATH, "trainclasses1.txt"))
print(len(train_classes), train_classes)
print()

# Read validation classes  ---  Number of Classes: 13
val_classes = read_file(os.path.join(AWA1_PATH, "valclasses1.txt"))
print(len(val_classes), val_classes)
print()

# Read training and validation classes  ---  Number of Classes: 40 (27 + 13)
trainval_classes = read_file(os.path.join(AWA1_PATH, "trainvalclasses.txt"))
print(len(trainval_classes), trainval_classes)
print()

# Read test classes  ---  Number of Classes: 10
test_classes = read_file(os.path.join(AWA1_PATH, "testclasses.txt"))
print(len(test_classes), test_classes)
print()

27 ['squirrel', 'collie', 'grizzly+bear', 'otter', 'mouse', 'siamese+cat', 'wolf', 'elephant', 'polar+bear', 'antelope', 'chihuahua', 'skunk', 'buffalo', 'weasel', 'fox', 'german+shepherd', 'persian+cat', 'spider+monkey', 'hippopotamus', 'humpback+whale', 'cow', 'rhinoceros', 'tiger', 'killer+whale', 'lion', 'zebra', 'pig']

13 ['mole', 'beaver', 'deer', 'gorilla', 'chimpanzee', 'dalmatian', 'ox', 'giant+panda', 'leopard', 'hamster', 'moose', 'rabbit', 'raccoon']

40 ['killer+whale', 'beaver', 'dalmatian', 'persian+cat', 'german+shepherd', 'siamese+cat', 'skunk', 'mole', 'tiger', 'hippopotamus', 'leopard', 'spider+monkey', 'elephant', 'gorilla', 'ox', 'chimpanzee', 'hamster', 'fox', 'squirrel', 'rabbit', 'wolf', 'chihuahua', 'weasel', 'otter', 'buffalo', 'zebra', 'giant+panda', 'pig', 'lion', 'polar+bear', 'collie', 'cow', 'deer', 'mouse', 'humpback+whale', 'antelope', 'grizzly+bear', 'rhinoceros', 'raccoon', 'moose']

10 ['sheep', 'dolphin', 'bat', 'seal', 'blue+whale', 'rat', 'horse'

In [6]:
# Load image features
features = sio.loadmat(os.path.join(AWA1_PATH, "res101.mat"))['features'].T
print(features.shape)

# Load image labels
labels   = sio.loadmat(os.path.join(AWA1_PATH, "res101.mat"))['labels']
print(labels.shape)

# Load metadata of dataset
metaData = sio.loadmat(os.path.join(AWA1_PATH, "att_splits.mat"))
print(metaData.keys())

(30475, 2048)
(30475, 1)
dict_keys(['__header__', '__version__', '__globals__', 'allclasses_names', 'att', 'original_att', 'test_seen_loc', 'test_unseen_loc', 'trainval_loc', 'train_loc', 'val_loc'])


In [7]:
# Read all classes
all_classes = [str(currClass[0][0]) for currClass in metaData['allclasses_names']]
print(all_classes)

['antelope', 'grizzly+bear', 'killer+whale', 'beaver', 'dalmatian', 'persian+cat', 'horse', 'german+shepherd', 'blue+whale', 'siamese+cat', 'skunk', 'mole', 'tiger', 'hippopotamus', 'leopard', 'moose', 'spider+monkey', 'humpback+whale', 'elephant', 'gorilla', 'ox', 'fox', 'sheep', 'seal', 'chimpanzee', 'hamster', 'squirrel', 'rhinoceros', 'rabbit', 'bat', 'giraffe', 'wolf', 'chihuahua', 'rat', 'weasel', 'otter', 'buffalo', 'zebra', 'giant+panda', 'deer', 'bobcat', 'pig', 'lion', 'mouse', 'polar+bear', 'collie', 'walrus', 'raccoon', 'cow', 'dolphin']


In [8]:
# Find train class indices
train_indices = [i for i, class_ in enumerate(all_classes) if class_ in train_classes]
print("|#ofIndices:", len(train_indices), "\t|Train Indices:", train_indices)

|#ofIndices: 27 	|Train Indices: [0, 1, 2, 5, 7, 9, 10, 12, 13, 16, 17, 18, 21, 26, 27, 31, 32, 34, 35, 36, 37, 41, 42, 43, 44, 45, 48]


In [9]:
# Find val class indices
val_indices = [i for i, class_ in enumerate(all_classes) if class_ in val_classes]
print("|#ofIndices:", len(val_indices), "\t|Val Indices:", val_indices)

|#ofIndices: 13 	|Val Indices: [3, 4, 11, 14, 15, 19, 20, 24, 25, 28, 38, 39, 47]


In [10]:
trainval_indices = [i for i, class_ in enumerate(all_classes) if class_ in trainval_classes]
print("|#ofIndices:", len(trainval_indices), "\t|TrainVal Indices:", trainval_indices)

|#ofIndices: 40 	|TrainVal Indices: [0, 1, 2, 3, 4, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 24, 25, 26, 27, 28, 31, 32, 34, 35, 36, 37, 38, 39, 41, 42, 43, 44, 45, 47, 48]


In [11]:
test_indices = [i for i, class_ in enumerate(all_classes) if class_ in test_classes]
print("|#ofIndices:", len(test_indices), "\t|Test Indices:", test_indices)

|#ofIndices: 10 	|Test Indices: [6, 8, 22, 23, 29, 30, 33, 40, 46, 49]


In [12]:
trainval_loc    = metaData['trainval_loc']
test_seen_loc   = metaData['test_seen_loc']
test_unseen_loc = metaData['test_unseen_loc']

print("Num TrainVal    : ", str(trainval_loc.shape[0]))
print("Num Test Seen   : ", str(test_seen_loc.shape[0]))
print("Num Test Unseen : ", str(test_unseen_loc.shape[0]))

Num TrainVal    :  19832
Num Test Seen   :  4958
Num Test Unseen :  5685


In [13]:
if not os.path.exists(os.path.join(AWA1P_PATH, "test")):
    os.makedirs(os.path.join(AWA1P_PATH, "test"))

if not os.path.exists(os.path.join(AWA1P_PATH, "validation")):
    os.makedirs(os.path.join(AWA1P_PATH, "validation"))

In [14]:
# LABELS (PART I)

trainval_labels    = (labels[trainval_loc - 1] - 1).reshape(-1, 1)
test_seen_labels   = (labels[test_seen_loc - 1] - 1).reshape(-1, 1)
test_unseen_labels = (labels[test_unseen_loc - 1] - 1).reshape(-1, 1)

sio.savemat(os.path.join(AWA1P_PATH, "test", "trainval_labels.mat"), {'trainval_labels':trainval_labels})
sio.savemat(os.path.join(AWA1P_PATH, "test", "test_seen_labels.mat"), {'test_seen_labels':test_seen_labels})
sio.savemat(os.path.join(AWA1P_PATH, "test", "test_unseen_labels.mat"), {'test_unseen_labels':test_unseen_labels})

In [15]:
# FEATURES (PART I)

trainval_features     = (features[trainval_loc - 1]).reshape(-1, 2048)
test_seen_features    = (features[test_seen_loc - 1]).reshape(-1, 2048)
test_unseen_features  = (features[test_unseen_loc - 1]).reshape(-1, 2048)

sio.savemat(os.path.join(AWA1P_PATH, "test", "trainval_features.mat"), {'trainval_features':trainval_features})
sio.savemat(os.path.join(AWA1P_PATH, "test", "test_seen_features.mat"), {'test_seen_features':test_seen_features})
sio.savemat(os.path.join(AWA1P_PATH, "test", "test_unseen_features.mat"), {'test_unseen_features':test_unseen_features})

In [16]:
train_loc      = []
val_unseen_loc = [] 

for i, label in enumerate(trainval_labels):

    if label[0] in train_indices:
        train_loc.append(i)
    elif label[0] in val_indices:
        val_unseen_loc.append(i)
    else:
        pass
    
val_unseen_loc    = np.asarray(val_unseen_loc)

print("Num train loc     : %d" % len(train_loc))
print("Num val unseen loc: %d" % len(val_unseen_loc))

Num train loc     : 13460
Num val unseen loc: 6372


In [17]:
from sklearn.model_selection import train_test_split
splitRate = 0.1

x_train_loc, x_val_seen_loc, _, _ = train_test_split(train_loc,\
                                                     trainval_labels[np.asarray(train_loc)],\
                                                     test_size=splitRate,\
                                                     random_state=123)
x_train_loc    = np.asarray(x_train_loc)
x_val_seen_loc = np.asarray(x_val_seen_loc)
print("Num x_train loc    : %d" % len(x_train_loc))
print("Num x_val_seen loc : %d" % len(x_val_seen_loc))

Num x_train loc    : 12114
Num x_val_seen loc : 1346


In [18]:
# LABELS (PART II)

train_labels      = trainval_labels[x_train_loc]
val_seen_labels   = trainval_labels[x_val_seen_loc]
val_unseen_labels = trainval_labels[val_unseen_loc]

sio.savemat(os.path.join(AWA1P_PATH, "validation", "train_labels.mat"), {'train_labels':train_labels})
sio.savemat(os.path.join(AWA1P_PATH, "validation", "val_seen_labels.mat"), {'val_seen_labels':val_seen_labels})
sio.savemat(os.path.join(AWA1P_PATH, "validation", "val_unseen_labels.mat"), {'val_unseen_labels':val_unseen_labels})

In [19]:
# FEATURES (PART II)

train_features      = trainval_features[x_train_loc]
val_seen_features   = trainval_features[x_val_seen_loc]
val_unseen_features = trainval_features[val_unseen_loc]

sio.savemat(os.path.join(AWA1P_PATH, "validation", "train_features.mat"), {'train_features':train_features})
sio.savemat(os.path.join(AWA1P_PATH, "validation", "val_seen_features.mat"), {'val_seen_features':val_seen_features})
sio.savemat(os.path.join(AWA1P_PATH, "validation", "val_unseen_features.mat"), {'val_unseen_features':val_unseen_features})

In [20]:
attribute     = metaData['att'].T
org_attribute = metaData['original_att'].T
print(attribute.shape)

(50, 85)


In [22]:
# class-level attributes
sio.savemat(os.path.join(AWA1P_PATH, "attributes.mat"), {'attributes':attribute})