In [1]:
#
# prepareData.ipynb
#
# Jupyter Notebook to prepare datasets for further zero-shot learning tasks 
# APY  : Attribute Pascal & Yahoo Dataset
#
# Written by cetinsamet -*- cetin.samet@metu.edu.tr
# April, 2019 

In [2]:
import scipy.io as sio
import numpy as np
import os

In [3]:
def read_file(filepath):
    file_content = []
    with open(filepath, 'r') as infile:
        for line in infile:
            file_content.append(line.strip())
    
    return file_content

In [5]:
# ------ * ------ # ------ * ------ # ------ * ------ # ------ * ------ # ------ * ------ # ------ * ------ #
#                                                 APY DATASET                                               #
# ------ * ------ # ------ * ------ # ------ * ------ # ------ * ------ # ------ * ------ # ------ * ------ #

In [6]:
# Read training classes  ---  Number of Classes: 15
train_classes = read_file('APY/trainclasses1.txt')

# Read validation classes  ---  Number of Classes: 5
val_classes   = read_file('APY/valclasses1.txt')

# Read training and validation classes  ---  Number of Classes: 20 (15 + 5)
trainval_classes = read_file('APY/trainvalclasses.txt') 

# Read test classes  ---  Number of Classes: 12
test_classes     = read_file('APY/testclasses.txt')

In [7]:
# Load image features
features = sio.loadmat('APY/res101.mat')['features'].T

# Load image labels
labels   = sio.loadmat('APY/res101.mat')['labels']

# Load metadata of dataset
metaData = sio.loadmat('APY/att_splits.mat')

In [8]:
# Read all classes
all_classes = [str(currClass[0][0]) for currClass in metaData['allclasses_names']]

In [9]:
# Find train class indices
train_indices = [i for i, class_ in enumerate(all_classes) if class_ in train_classes]
print("|#ofIndices:", len(train_indices), "\t|Train Indices:", train_indices)

|#ofIndices: 15 	|Train Indices: [1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 21, 25, 26, 27, 29]


In [10]:
# Find val class indices
val_indices = [i for i, class_ in enumerate(all_classes) if class_ in val_classes]
print("|#ofIndices:", len(val_indices), "\t\t|Val Indices:", val_indices)

|#ofIndices: 5 		|Val Indices: [0, 17, 23, 30, 31]


In [11]:
trainval_indices = [i for i, class_ in enumerate(all_classes) if class_ in trainval_classes]
print("|#ofIndices:", len(trainval_indices), "\t|TrainVal Indices:", trainval_indices)

|#ofIndices: 20 	|TrainVal Indices: [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 17, 21, 23, 25, 26, 27, 29, 30, 31]


In [12]:
test_indices = [i for i, class_ in enumerate(all_classes) if class_ in test_classes]
print("|#ofIndices:", len(test_indices), "\t|Test Indices:", test_indices)

|#ofIndices: 12 	|Test Indices: [9, 12, 13, 14, 15, 16, 18, 19, 20, 22, 24, 28]


In [13]:
trainval_loc    = metaData['trainval_loc']
test_seen_loc   = metaData['test_seen_loc']
test_unseen_loc = metaData['test_unseen_loc']

print("Num TrainVal    : ", str(trainval_loc.shape[0]))
print("Num Test Seen   : ", str(test_seen_loc.shape[0]))
print("Num Test Unseen : ", str(test_unseen_loc.shape[0]))

Num TrainVal    :  5932
Num Test Seen   :  1483
Num Test Unseen :  7924


In [14]:
if not os.path.exists('APYP/test'):
    os.makedirs('APYP/test')
    
if not os.path.exists('APYP/validation'):
    os.makedirs('APYP/validation')

In [15]:
# LABELS (PART I)

trainval_labels    = (labels[trainval_loc - 1] - 1).reshape(-1, 1)
test_seen_labels   = (labels[test_seen_loc - 1] - 1).reshape(-1, 1)
test_unseen_labels = (labels[test_unseen_loc - 1] - 1).reshape(-1, 1)

sio.savemat('APYP/test/trainval_labels.mat', {'trainval_labels':trainval_labels})
sio.savemat('APYP/test/test_seen_labels.mat', {'test_seen_labels':test_seen_labels})
sio.savemat('APYP/test/test_unseen_labels.mat', {'test_unseen_labels':test_unseen_labels})

In [16]:
# FEATURES (PART I)

trainval_features     = (features[trainval_loc - 1]).reshape(-1, 2048)
test_seen_features    = (features[test_seen_loc - 1]).reshape(-1, 2048)
test_unseen_features  = (features[test_unseen_loc - 1]).reshape(-1, 2048)

sio.savemat('APYP/test/trainval_features.mat', {'trainval_features':trainval_features})
sio.savemat('APYP/test/test_seen_features.mat', {'test_seen_features':test_seen_features})
sio.savemat('APYP/test/test_unseen_features.mat', {'test_unseen_features':test_unseen_features})

In [17]:
train_loc      = []
val_unseen_loc = [] 

for i, label in enumerate(trainval_labels):

    if label[0] in train_indices:
        train_loc.append(i)
    elif label[0] in val_indices:
        val_unseen_loc.append(i)
    else:
        pass
    
val_unseen_loc    = np.asarray(val_unseen_loc)

print("Num train loc     : %d" % len(train_loc))
print("Num val unseen loc: %d" % len(val_unseen_loc))

Num train loc     : 4906
Num val unseen loc: 1026


In [18]:
from sklearn.model_selection import train_test_split
splitRate = 0.1

x_train_loc, x_val_seen_loc, _, _ = train_test_split(train_loc,\
                                                     trainval_labels[np.asarray(train_loc)],\
                                                     test_size=splitRate,\
                                                     random_state=123)
x_train_loc    = np.asarray(x_train_loc)
x_val_seen_loc = np.asarray(x_val_seen_loc)
print("Num x_train loc    : %d" % len(x_train_loc))
print("Num x_val_seen loc : %d" % len(x_val_seen_loc))

Num x_train loc    : 4415
Num x_val_seen loc : 491


In [19]:
# LABELS (PART II)

train_labels      = trainval_labels[x_train_loc]
val_seen_labels   = trainval_labels[x_val_seen_loc]
val_unseen_labels = trainval_labels[val_unseen_loc]

sio.savemat('APYP/validation/train_labels.mat', {'train_labels':train_labels})
sio.savemat('APYP/validation/val_seen_labels.mat', {'val_seen_labels':val_seen_labels})
sio.savemat('APYP/validation/val_unseen_labels.mat', {'val_unseen_labels':val_unseen_labels})

In [20]:
# FEATURES (PART II)

train_features      = trainval_features[x_train_loc]
val_seen_features   = trainval_features[x_val_seen_loc]
val_unseen_features = trainval_features[val_unseen_loc]

sio.savemat('APYP/validation/train_features.mat', {'train_features':train_features})
sio.savemat('APYP/validation/val_seen_features.mat', {'val_seen_features':val_seen_features})
sio.savemat('APYP/validation/val_unseen_features.mat', {'val_unseen_features':val_unseen_features})

In [21]:
attribute     = metaData['att'].T
org_attribute = metaData['original_att'].T

In [22]:
# CLASS-LEVEL ATTRIBUTES
sio.savemat('APYP/all_class_vec.mat', {'all_class_vec':attribute})