## Single-output 20 class classification with synthetic dataset

In [1]:
from torch import Tensor
### import Necessary packages
import sys
sys.path.append('/Users/jiangxiaoyu/Desktop/All Projects/GPLVM_project_code/')
from models_.lvmogp_svi import LVMOGP_SVI
from models_.variational_elbo import VariationalELBO
from models_.momc_ar_likelihood import Multi_Output_Multi_Class_AR_Likelihood
import torch
from torch.optim.lr_scheduler import StepLR
from tqdm import trange
import random
import numpy as np
import time

In [2]:
### Copy from chunchao's code...
from sklearn.datasets import make_classification as mc

X1, Y1 = mc(n_samples=2000, n_classes=20, n_features=5, n_redundant=0, n_informative=5, n_clusters_per_class=1,
                    random_state=1)
X = X1.copy()
Y = Y1[:, None].copy()

In [3]:
train_test_split = 1300
X_train, X_test = X[:train_test_split], X[train_test_split:]
Y_train, Y_test = Y[:train_test_split], Y[train_test_split:]

In [39]:
print('The number of classes:', Y.max() - Y.min() + 1)
print('The number of train data samples:' , X_train.shape[0])
print('The number of test data samples:' , X_test.shape[0])
print('The number of features:', X_train.shape[-1])

The number of classes: 20
The number of train data samples: 1300
The number of test data samples: 700
The number of features: 5


In [40]:
# Define hyperparameters

clf_list = [20]
# NOTE
# len(clf_list) the total number of outputs
# clf_list[i] the number of classes for (i+1)th output
n_outputs = len(clf_list)
n_latent = int(Tensor(clf_list).sum()) # NOTE n_outputs != n_latent for general cases
n_inputs = int(X_train.shape[0])
index_dim = X_train.shape[-1] # this is 5
latent_dim = 2
n_inducing_inputs = 50
n_inducing_latent = 5
pca = False # Think carefully when setting this to True
n_total= n_outputs * n_inputs

n_train_iterations = 10 # 1000
learning_rate = 0.01
schduler_step_size = 50
schduler_gamma = 0.8
num_latent_MC = 1

In [41]:
my_model = LVMOGP_SVI(n_X=n_latent, n_C=n_inputs, index_dim=index_dim, latent_dim=latent_dim, n_inducing_C=n_inducing_inputs, n_inducing_X=n_inducing_latent, data_Y=X_train.reshape(n_latent, -1), pca=pca)

In [42]:
# Specify model, likelihood and training objective.
my_model = LVMOGP_SVI(n_X=n_latent, n_C=n_inputs, index_dim=index_dim, latent_dim=latent_dim, n_inducing_C=n_inducing_inputs, n_inducing_X=n_inducing_latent, data_Y=X_train.reshape(n_latent, -1), pca=pca)
likelihood = Multi_Output_Multi_Class_AR_Likelihood(clf_list) # how many outputs
mll = VariationalELBO(likelihood, my_model, num_data=n_total)

# Optimizer and Scheduler
optimizer = torch.optim.Adam([ # TODO: tune the choice of optimizer: SGD...
    {'params': my_model.parameters()}], lr=learning_rate)
scheduler = StepLR(optimizer, step_size=schduler_step_size, gamma=schduler_gamma)

In [43]:
from typing import List
def clf_sample_f_index_everyoutput(my_model, clf_list:List, labels:Tensor, num_class_per_output=2, num_input_samples:int=3, re_index_latent_idxs=True):
    '''
    
    This function returns subsampling of (all_outputs, all_classes, all_inputs) pairs.
    All outputs are preserved, only classes and inputs are subsampled.
    Args:
        my_model: an instance of LVMOGP_SVI, _get_batch_idx function is in use.
        clf_list: list of n_classes. for example, [20, 13, 17] means 3 outputs with 20, 13, 17 classes respectively.
        labels: of shape (n_inputs, n_outputs). labels[a][b] extracts the classification label for a+1 th input at b+1 th output. 
        num_class_per_output: how many classes we want during subsampling.
            TODO: different output has different num of classes
        num_input_samples: how many data samples we want duing subsampling.

    Return:
        batch_index_latent: of shape (num_outputs, num_class_per_output+1, num_input_samples)
        batch_index_inputs: of shape (num_outputs, num_class_per_output+1, num_input_samples)
    NOTE: 
        1. Same set of inputs for every output.
        2. Same number of classes are downsampled for every output, seems unresonable if # total classes vary a lot across outputs.
        3. The final index on the second dim of batch_index_inputs is true label of the corresponding (input, output) pair which is useful in the future.
    '''

    num_outputs = len(clf_list)
    input_samples = Tensor(my_model._get_batch_idx(num_input_samples, sample_X = False)).to(int)

    final_inputs_idxs = input_samples.unsqueeze(0).unsqueeze(0)
    final_inputs_idxs = final_inputs_idxs.expand(num_outputs, (num_class_per_output+1), num_input_samples)

    final_latent_idxs = torch.zeros(num_outputs, (num_class_per_output+1), num_input_samples)

    for i in range(num_input_samples):
        for j in range(num_outputs):
            curr_true_label_idx = labels[input_samples[i], j] # classification label for i+1 th input at j+1 th output ; labels[final_inputs_idxs[j,0,i]][j]
            num_class_curr_output = clf_list[j]
            available_range = list(np.arange(num_class_curr_output)[np.arange(num_class_curr_output) != curr_true_label_idx]) 
            assert len(available_range) == num_class_curr_output - 1
            curr_class_idx_list = random.sample(available_range, num_class_per_output)
            curr_class_idx_list.append(curr_true_label_idx) # of length num_class_per_output + 1
            assert len(curr_class_idx_list) == num_class_per_output + 1
            
            final_latent_idxs[j,:,i] = Tensor(curr_class_idx_list)
    
    assert final_inputs_idxs.shape == final_latent_idxs.shape

    if not re_index_latent_idxs:
        return final_latent_idxs.to(int), final_inputs_idxs
    
    # Transform idx properly to better match slicing functionality from my_model.sample_latent_variable()
    else:
        counter = 0
        for i in range(num_outputs):
            final_latent_idxs[i,...] += counter
            counter += clf_list[i]
        return final_latent_idxs.to(int), final_inputs_idxs
        



In [53]:
'''
batch_index_latent, batch_index_inputs = clf_sample_f_index_everyoutput(my_model, clf_list, Y_train)
print(batch_index_latent)

# implement 1
imp1_start = time.time()
sample_X_1 = my_model.sample_latent_variable()
sample_batch_X_1 = sample_X_1[batch_index_latent]
imp1_end = time.time()
print('time for imp1:', imp1_end - imp1_start)
print(sample_batch_X_1)
# implement 2
imp2_start = time.time()
sample_X_2 =  my_model.sample_latent_variable(batch_index_latent)
imp2_end = time.time()
print('time for imp2:', imp2_end - imp2_start)
print(sample_X_2)
'''


# Training!
loss_list = []
iterator = trange(n_train_iterations, leave=True)

my_model.train()
# likelihood.train()
for i in iterator: 
    batch_index_latent, batch_index_inputs = clf_sample_f_index_everyoutput(my_model, clf_list, Y_train)
    # core code is here 
    optimizer.zero_grad()
    total_loss = 0
    for _ in range(num_latent_MC):

        sample_latent = my_model.sample_latent_variable(batch_index_latent) 
        sample_inputs = Tensor(X_train[batch_index_inputs])
        output_batch = my_model(sample_latent.reshape(-1,latent_dim), sample_inputs.reshape(-1,index_dim)) # q(f)
        # loss = -mll(output_batch)
        # total_loss += loss
    print(output_batch.loc.detach().reshape(batch_index_latent.shape))
    print(output_batch.variance.detach())
    print(stop)


  0%|          | 0/10 [00:00<?, ?it/s]

tensor([[[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]])
tensor([0.4806, 0.4806, 0.4806, 0.4806, 0.4806, 0.4806, 0.4806, 0.4806, 0.4806])





NameError: name 'stop' is not defined

In [None]:
# Training!
loss_list = []
iterator = trange(n_train_iterations, leave=True)

my_model.train()
likelihood.train()
for i in iterator: 
    batch_index_latent, batch_index_inputs = clf_sample_f_index_everyoutput(my_model, clf_list, Y_train)
    # core code is here 
    optimizer.zero_grad()
    total_loss = 0
    for _ in range(num_latent_MC):

        sample_latent = my_model.sample_latent_variable(batch_index_latent) 
        sample_inputs = X_train[batch_index_inputs]
        output_batch = my_model(sample_latent.reshape(-1), sample_inputs.reshape(-1)) # q(f)
        loss = -mll(output_batch)
        total_loss += loss
    
    average_loss = total_loss / num_X_MC
    loss_list.append(average_loss.item())
    iterator.set_description('Loss: ' + str(float(np.round(average_loss.item(),2))) + ", iter no: " + str(i))
    average_loss.backward()

    # Gradient Clipping. Try Many Different Approaches.
    gradient_clip(my_model, approach=gradient_clip_approach, clip_value=10)
    gradient_clip(likelihood, clip_value=1)

    optimizer.step()
    scheduler.step()

In [78]:
a = torch.randn(2,4)
print(a.sum())

tensor(-4.8254)
