In [1]:
#
# prepareAPY.ipynb
#
# Jupyter Notebook to prepare APY dataset for further zero-shot learning tasks 
# APY  : aPascal&aYahoo!
#
# n_clases    : 64
# n_attributes: 32
#
# Written by cetinsamet -*- cetin.samet@metu.edu.tr
# December, 2019 

In [2]:
import scipy.io as sio
import numpy as np
import os

In [3]:
def read_file(filepath):
    file_content = []
    with open(filepath, 'r') as infile:
        for line in infile:
            file_content.append(line.strip())
    
    return file_content

In [4]:
APY_PATH = "./APY"
APYP_PATH = "./APYP" #Preprocessed APY directory

In [6]:
# Read training classes  ---  Number of Classes: 15
train_classes = read_file(os.path.join(APY_PATH, "trainclasses1.txt"))
print(len(train_classes), train_classes)
print()

# Read validation classes  ---  Number of Classes: 5
val_classes = read_file(os.path.join(APY_PATH, "valclasses1.txt"))
print(len(val_classes), val_classes)
print()

# Read training and validation classes  ---  Number of Classes: 20 (15 + 5)
trainval_classes = read_file(os.path.join(APY_PATH, "trainvalclasses.txt"))
print(len(trainval_classes), trainval_classes)
print()

# Read test classes  ---  Number of Classes: 12
test_classes = read_file(os.path.join(APY_PATH, "testclasses.txt"))
print(len(test_classes), test_classes)
print()

15 ['bird', 'cat', 'mug', 'bus', 'diningtable', 'bottle', 'car', 'boat', 'dog', 'zebra', 'monkey', 'centaur', 'chair', 'bicycle', 'building']

5 ['aeroplane', 'wolf', 'carriage', 'sofa', 'bag']

20 ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'dog', 'monkey', 'wolf', 'zebra', 'mug', 'building', 'bag', 'carriage', 'sofa', 'centaur', 'diningtable']

12 ['tvmonitor', 'goat', 'motorbike', 'cow', 'jetski', 'train', 'sheep', 'statue', 'horse', 'person', 'pottedplant', 'donkey']



In [7]:
# Load image features
features = sio.loadmat(os.path.join(APY_PATH, "res101.mat"))['features'].T
print(features.shape)

# Load image labels
labels   = sio.loadmat(os.path.join(APY_PATH, "res101.mat"))['labels']
print(labels.shape)

# Load metadata of dataset
metaData = sio.loadmat(os.path.join(APY_PATH, "att_splits.mat"))
print(metaData.keys())

(15339, 2048)
(15339, 1)
dict_keys(['__header__', '__version__', '__globals__', 'allclasses_names', 'att', 'original_att', 'test_seen_loc', 'test_unseen_loc', 'trainval_loc'])


In [8]:
# Read all classes
all_classes = [str(currClass[0][0]) for currClass in metaData['allclasses_names']]
print(all_classes)

['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor', 'donkey', 'monkey', 'goat', 'wolf', 'jetski', 'zebra', 'centaur', 'mug', 'statue', 'building', 'bag', 'carriage']


In [9]:
# Find train class indices
train_indices = [i for i, class_ in enumerate(all_classes) if class_ in train_classes]
print("|#ofIndices:", len(train_indices), "\t|Train Indices:", train_indices)

|#ofIndices: 15 	|Train Indices: [1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 21, 25, 26, 27, 29]


In [10]:
# Find val class indices
val_indices = [i for i, class_ in enumerate(all_classes) if class_ in val_classes]
print("|#ofIndices:", len(val_indices), "\t|Val Indices:", val_indices)

|#ofIndices: 5 	|Val Indices: [0, 17, 23, 30, 31]


In [11]:
trainval_indices = [i for i, class_ in enumerate(all_classes) if class_ in trainval_classes]
print("|#ofIndices:", len(trainval_indices), "\t|TrainVal Indices:", trainval_indices)

|#ofIndices: 20 	|TrainVal Indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 17, 21, 23, 25, 26, 27, 29, 30, 31]


In [12]:
test_indices = [i for i, class_ in enumerate(all_classes) if class_ in test_classes]
print("|#ofIndices:", len(test_indices), "\t|Test Indices:", test_indices)

|#ofIndices: 12 	|Test Indices: [9, 12, 13, 14, 15, 16, 18, 19, 20, 22, 24, 28]


In [13]:
trainval_loc    = metaData['trainval_loc']
test_seen_loc   = metaData['test_seen_loc']
test_unseen_loc = metaData['test_unseen_loc']

print("Num TrainVal    : ", str(trainval_loc.shape[0]))
print("Num Test Seen   : ", str(test_seen_loc.shape[0]))
print("Num Test Unseen : ", str(test_unseen_loc.shape[0]))

Num TrainVal    :  5932
Num Test Seen   :  1483
Num Test Unseen :  7924


In [15]:
if not os.path.exists(os.path.join(APY_PATH, "test")):
    os.makedirs(os.path.join(APYP_PATH, "test"))

if not os.path.exists(os.path.join(APYP_PATH, "validation")):
    os.makedirs(os.path.join(APYP_PATH, "validation"))

In [16]:
# LABELS (PART I)

trainval_labels    = (labels[trainval_loc - 1] - 1).reshape(-1, 1)
test_seen_labels   = (labels[test_seen_loc - 1] - 1).reshape(-1, 1)
test_unseen_labels = (labels[test_unseen_loc - 1] - 1).reshape(-1, 1)

sio.savemat(os.path.join(APYP_PATH, "test", "trainval_labels.mat"), {'trainval_labels':trainval_labels})
sio.savemat(os.path.join(APYP_PATH, "test", "test_seen_labels.mat"), {'test_seen_labels':test_seen_labels})
sio.savemat(os.path.join(APYP_PATH, "test", "test_unseen_labels.mat"), {'test_unseen_labels':test_unseen_labels})

In [17]:
# FEATURES (PART I)

trainval_features     = (features[trainval_loc - 1]).reshape(-1, 2048)
test_seen_features    = (features[test_seen_loc - 1]).reshape(-1, 2048)
test_unseen_features  = (features[test_unseen_loc - 1]).reshape(-1, 2048)

sio.savemat(os.path.join(APYP_PATH, "test", "trainval_features.mat"), {'trainval_features':trainval_features})
sio.savemat(os.path.join(APYP_PATH, "test", "test_seen_features.mat"), {'test_seen_features':test_seen_features})
sio.savemat(os.path.join(APYP_PATH, "test", "test_unseen_features.mat"), {'test_unseen_features':test_unseen_features})

In [18]:
train_loc      = []
val_unseen_loc = [] 

for i, label in enumerate(trainval_labels):

    if label[0] in train_indices:
        train_loc.append(i)
    elif label[0] in val_indices:
        val_unseen_loc.append(i)
    else:
        pass
    
val_unseen_loc    = np.asarray(val_unseen_loc)

print("Num train loc     : %d" % len(train_loc))
print("Num val unseen loc: %d" % len(val_unseen_loc))

Num train loc     : 4906
Num val unseen loc: 1026


In [19]:
from sklearn.model_selection import train_test_split
splitRate = 0.33

x_train_loc, x_val_seen_loc, _, _ = train_test_split(train_loc,\
                                                     trainval_labels[np.asarray(train_loc)],\
                                                     test_size=splitRate,\
                                                     random_state=123)
x_train_loc    = np.asarray(x_train_loc)
x_val_seen_loc = np.asarray(x_val_seen_loc)
print("Num x_train loc    : %d" % len(x_train_loc))
print("Num x_val_seen loc : %d" % len(x_val_seen_loc))

Num x_train loc    : 3287
Num x_val_seen loc : 1619


In [20]:
# LABELS (PART II)

train_labels      = trainval_labels[x_train_loc]
val_seen_labels   = trainval_labels[x_val_seen_loc]
val_unseen_labels = trainval_labels[val_unseen_loc]

sio.savemat(os.path.join(APYP_PATH, "validation", "train_labels.mat"), {'train_labels':train_labels})
sio.savemat(os.path.join(APYP_PATH, "validation", "val_seen_labels.mat"), {'val_seen_labels':val_seen_labels})
sio.savemat(os.path.join(APYP_PATH, "validation", "val_unseen_labels.mat"), {'val_unseen_labels':val_unseen_labels})

In [21]:
# FEATURES (PART II)

train_features      = trainval_features[x_train_loc]
val_seen_features   = trainval_features[x_val_seen_loc]
val_unseen_features = trainval_features[val_unseen_loc]

sio.savemat(os.path.join(APYP_PATH, "validation", "train_features.mat"), {'train_features':train_features})
sio.savemat(os.path.join(APYP_PATH, "validation", "val_seen_features.mat"), {'val_seen_features':val_seen_features})
sio.savemat(os.path.join(APYP_PATH, "validation", "val_unseen_features.mat"), {'val_unseen_features':val_unseen_features})

In [22]:
attribute     = metaData['att'].T
org_attribute = metaData['original_att'].T
print(attribute.shape)

(32, 64)


In [25]:
# class-level attributes
sio.savemat(os.path.join(APYP_PATH, "attributes.mat"), {'attributes':attribute})