In [1]:
import numpy as np
import scipy.io as sio
import pickle
import os

In [2]:
# Load the Lenk dataset
lenk_name = "data/lenk/lenk_data.mat"
lenk_dataset = sio.loadmat(lenk_name)
print(lenk_dataset.keys())
lenk_train = lenk_dataset['Traindata']
lenk_test = lenk_dataset['Testdata']

dict_keys(['__header__', '__version__', '__globals__', 'Traindata', 'Testdata'])


In [3]:
# Convert Lenk train dataset into T x N x d1 form
n_train = np.unique(lenk_train[:, :-1], axis=0).shape[0]
tasks = []
labels = []
for task_begin in range(0, lenk_train.shape[0], n_train):
    task_end = task_begin + n_train
    task = lenk_train[task_begin:task_end,:-1]
    label = lenk_train[task_begin:task_end,-1]
    tasks.append(task)
    labels.append(label)

# Add Lenk test dataset into the mix
n_test = np.unique(lenk_test[:, :-1], axis=0).shape[0]
task_i = 0
for task_begin in range(0, lenk_test.shape[0], n_test):
    task_end = task_begin + n_test
    task = lenk_test[task_begin:task_end,:-1]
    label = lenk_test[task_begin:task_end,-1]
    tasks[task_i] = np.vstack((tasks[task_i], task))
    labels[task_i] = np.append(labels[task_i], label)
    task_i += 1
tasks = np.asarray(tasks)
print(tasks.shape)
labels = np.asarray(labels)
print(labels.shape)

(180, 20, 14)
(180, 20)


In [4]:
# Extract the initial parameters
T = tasks.shape[0]
d1 = tasks[0].shape[1]
print("T = {}".format(T))
print("d1 = {}".format(d1))

T = 180
d1 = 14


In [5]:
def transform_Y(task, labels):
    d2 = 2 * task.shape[1]
    sigma = 100

    # Generate the parameters for the transform
    sum_phi = np.zeros(d2)
    for i in range(task.shape[0]):
        vec_operand = np.reshape(task[i], (14, 1)) @ np.reshape(np.transpose(np.array([labels[i], 1])), (1, 2))
        vec_op_1 = vec_operand[:,0]
        vec_op_2 = vec_operand[:,1]
        phi = np.concatenate((vec_op_1, vec_op_2))
        sum_phi += phi
    avg_phi = np.divide(sum_phi, task.shape[0])
    return avg_phi/np.linalg.norm(avg_phi)

In [6]:
# Choose test task and the rest is training
test_task = tasks[0]        # Can also choose this randomly
test_labels = labels[0]
train_tasks = tasks[1:]
train_labels = labels[1:]

#normalize and create a test evaluation set
Y0 = transform_Y(test_task, test_labels)
X0 = test_task
R0 = test_labels
R0 = np.expand_dims(R0, axis=1)
#normalize test task
normx0 = np.linalg.norm(X0, axis=1).reshape(len(X0),1)
#
X0new = X0/normx0
R0new = R0/normx0
# save test task
pickle.dump(X0new, open('./data/lenk/X0.pkl', "wb"))
pickle.dump(Y0, open('./data/lenk/Y0.pkl', "wb"))
pickle.dump(R0new, open('./data/lenk/R0.pkl', "wb"))
# 

In [7]:
# prepare training dataset

# select 50 tasks uniformly at random from the training tasks
indices = np.random.randint(0,T-1,size=50)
d2 = 2*d1
Ytrain = np.zeros((50,d2))
for i in range(50):
    X = train_tasks[indices[i]]
    labs = train_labels[indices[i]]
    Ytrain[i,] = transform_Y(X,labs)
 



#20 samples from each task in train_tasks
Xtrain = np.zeros((50*20, d1))
task_function = np.zeros(50*20)
Rtrain = np.zeros((50*20,1))

for i in range(50):
    Xtrain[(20*i):(20*i + 20),] = train_tasks[indices[i]]
    task_function[(20*i):(20*i + 20)] = i
    Rtrain[(20*i):(20*i + 20)] = np.expand_dims(train_labels[indices[i]],axis=1)
    


#normalize data
normXt = np.linalg.norm(Xtrain, axis=1).reshape(len(Xtrain),1)
Xtrain_new = Xtrain/normXt
Rtrain_new = Rtrain/normXt

# save meta-train dataset
pickle.dump(Xtrain_new, open('./data/lenk/X.pkl', "wb"))
pickle.dump(Ytrain, open('./data/lenk/Y.pkl', "wb"))
pickle.dump(Rtrain_new, open('./data/lenk/R.pkl', "wb"))
pickle.dump(task_function, open('./data/lenk/task_function.pkl', "wb"))

In [8]:
#create an evaluation set for the meta-test task

indices = np.random.randint(0,len(X0new),size=8)
X0eval = X0new[indices,]
R0eval = R0new[indices,]

pickle.dump(X0eval, open('./data/lenk/X0eval.pkl', "wb"))
pickle.dump(R0eval, open('./data/lenk/R0eval.pkl', "wb"))

#generate test data for 20 different trials and different sizes
X0test = X0new[-indices,]
R0test = R0new[-indices,]

for trial in range(20):
    for s in range(2,12,2):
        indices = np.random.randint(0, len(X0test), size=s)
        cX0 = X0test[indices,]
        cR0 = R0test[indices,]
        pickle.dump(cX0, open('./data/lenk/' + 'X0_N2_{N2F}_trial_{trialF}.pkl'.format(N2F=s, trialF=trial), 'wb'))
        pickle.dump(cR0, open('./data/lenk/' + 'R0_N2_{N2F}_trial_{trialF}.pkl'.format(N2F=s, trialF=trial), 'wb'))
 

In [None]:



# Loop over N2 for this new test task
parent_dir = "data/lenk/"
for N2 in range(2, 22, 2):
    # Get and store the test data
    random_indices = np.random.choice(test_task.shape[0], size=N2, replace=False)
    X0 = test_task[random_indices, :]
    Y0 = transform_Y(test_task, test_labels) # Independent of X
    R0 = test_labels[random_indices]
    path = os.path.join(parent_dir, "N2_{}/".format(N2))
    try:
        os.mkdir(path)
    except OSError as error:
        print(error)
    pickle.dump(X0, open(path + "X.pkl", "wb"))
    pickle.dump(Y0, open(path + "Y0.pkl", "wb"))
    pickle.dump(R0, open(path + "R0.pkl", "wb"))

    # Get and store the training data
    d1 = test_task.shape[1]
    d2 = 2 * d1
    X_full = np.ones((1, d1))
    Y_full = np.ones((1, d2))
    R_full = np.ones((1, 1))
    task_function = []
    index_total = 0
    for i in range(train_tasks.shape[0]):
        Y = transform_Y(train_tasks[i], train_labels[i])
        Y_full = np.vstack((Y_full, Y))
        for j in range(test_task.shape[0]):
            X = train_tasks[i][j]
            R = train_labels[i][j]
            X_full = np.vstack((X_full, X))
            R_full = np.vstack((R_full, R))

            prev_index_total = index_total
            index_total += X.shape[0]
            for j in range(prev_index_total, index_total):
                task_function.append(i)

    task_function = np.asarray(task_function)
    R_full = R_full[1:]     # N x 1
    print(R_full.shape)
    X_full = X_full[1:]     # N x d1
    print(X_full.shape)
    Y_full = Y_full[1:]     # T x d2
    print(Y_full.shape)
    pickle.dump(X_full, open(path + "X.pkl", "wb"))
    pickle.dump(Y_full, open(path + "Y.pkl", "wb"))
    pickle.dump(R_full, open(path + "R.pkl", "wb"))
    pickle.dump(task_function, open(path + "task_function.pkl", "wb"))