In [1]:
import math
import torch
import numpy as np
import gpytorch
from matplotlib import pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


### Loading UMAP data
1. loading data from MNIST dataset
2. choosing 600 data points from each training dataset (6000 in total)
3. choosing data points in propotion in the testing sets as well

In [2]:
with open('features_UMAP.npy', 'rb') as f:
    train_x = np.load(f)
    train_y_label = np.load(f)
    test_x = np.load(f)
    test_y = np.load(f)

In [3]:
train_x, train_y_label, test_x, test_y = \
    torch.from_numpy(train_x), torch.from_numpy(train_y_label), \
        torch.from_numpy(test_x), torch.from_numpy(test_y)

### Loading training labels
We use the scores of training samples from DNN to be the y labels

In [4]:
train_y = torch.load('train_score.pt')
train_y.shape

torch.Size([60000, 10])

In [91]:
train_y[0:7, :]

tensor([[-1.4306e+01, -1.5714e+01, -2.2822e+01, -5.8237e+00, -1.7193e+01,
         -3.1217e-03, -1.4866e+01, -1.8692e+01, -9.3796e+00, -9.5028e+00],
        [-4.7684e-07, -2.6279e+01, -1.7861e+01, -2.0814e+01, -2.3834e+01,
         -1.9055e+01, -1.5301e+01, -1.9174e+01, -1.6309e+01, -1.5895e+01],
        [-8.9569e+00, -8.6896e+00, -5.6311e+00, -9.2064e+00, -7.1852e-03,
         -1.1038e+01, -7.7042e+00, -7.2663e+00, -8.8430e+00, -6.2833e+00],
        [-1.6103e+01, -4.4345e-05, -1.2211e+01, -1.6662e+01, -1.1207e+01,
         -1.4991e+01, -1.2636e+01, -1.3914e+01, -1.0783e+01, -1.4988e+01],
        [-1.5714e+01, -1.5829e+01, -1.6959e+01, -1.6687e+01, -5.6171e+00,
         -1.2577e+01, -1.8971e+01, -1.1049e+01, -8.2875e+00, -3.9143e-03],
        [-1.5772e+01, -1.6557e+01, -8.4638e-06, -1.4882e+01, -2.0779e+01,
         -2.5425e+01, -2.2365e+01, -1.2368e+01, -1.2529e+01, -2.1784e+01],
        [-2.1715e+01, -7.1526e-07, -1.7992e+01, -2.3148e+01, -1.5851e+01,
         -1.9909e+01, -1.6972e+0

In [74]:
train_y_label[0:10]

tensor([5, 0, 4, 1, 9, 2, 1, 3, 1, 4], dtype=torch.uint8)

## Setting up the Model

In [5]:
from gpytorch.models import ExactGP
from gpytorch.means import ConstantMean
from gpytorch.kernels import ScaleKernel, RBFKernel

In [11]:
n = 2000
train_x, train_y = train_x[0: n], train_y[0: n, :]
test_y = test_y.to(torch.int64)
train_y.shape

torch.Size([2000, 10])

### Model Structure

In [26]:
class ExactGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(RBFKernel())

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [8]:
models, likelihoods, mlls = [], [], []
for j in range(10):
    y = train_y[:, j]
    x = train_x
    likelihood = gpytorch.likelihoods.GaussianLikelihood()
    model = ExactGPModel(x, y, likelihood)
    likelihood.train()
    model.train()
    likelihoods.append(likelihood)
    models.append(model)
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
    mlls.append(mll)

In [22]:
val_index = np.arange(1, n, 10)
tr_y = train_y[val_index, 1]
tr_y.shape

torch.Size([200])

Doing Cross Validation on Training data

Initialize the model and parameters

In [None]:
models, likelihoods, mlls = [], [], []
for j in range(10):
    y = train_y[:, j]
    x = train_x
    likelihood = gpytorch.likelihoods.GaussianLikelihood()
    model = ExactGPModel(x, y, likelihood)
    likelihood.train()
    model.train()
    likelihoods.append(likelihood)
    models.append(model)
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
    mlls.append(mll)

### Model Training

In [80]:
training_iter = 10

# Use the adam optimizer
lr = 0.1
opts = []
for j in range(10):
    optimizer = torch.optim.Adam(models[j].parameters(), lr = lr)  # Includes GaussianLikelihood parameters
    opts.append(optimizer)

In [81]:
for j in range(10):
    print("Model", j)
    model = models[j]
    likelihood = likelihoods[j]
    mll = mlls[j]
    optimizer = opts[j]
    y = train_y[:, j]
    for i in range(training_iter):                
        # Zero gradients from previous iteration
        optimizer.zero_grad()
        # Output from model
        output = model(train_x)
        # print(output)
        
        # Calc loss and backprop gradients
        loss = -mll(output, y)
        loss.backward()
          
        if i % 1 == 0:
            print('Iter %d/%d - Loss: %.3f   lengthscale: %.3f   noise: %.3f' % (
                i + 1, training_iter, loss.item(),
                #model.covar_module.base_kernel.lengthscale.item(),
                0.1,
                model.likelihood.noise.item()
            ))
        optimizer.step()

Model 0
Iter 1/10 - Loss: 15.020   lengthscale: 0.100   noise: 0.693
Iter 2/10 - Loss: 14.055   lengthscale: 0.100   noise: 0.744
Iter 3/10 - Loss: 13.183   lengthscale: 0.100   noise: 0.798
Iter 4/10 - Loss: 12.390   lengthscale: 0.100   noise: 0.854
Iter 5/10 - Loss: 11.680   lengthscale: 0.100   noise: 0.911
Iter 6/10 - Loss: 11.040   lengthscale: 0.100   noise: 0.970
Iter 7/10 - Loss: 10.460   lengthscale: 0.100   noise: 1.031
Iter 8/10 - Loss: 9.933   lengthscale: 0.100   noise: 1.093
Iter 9/10 - Loss: 9.468   lengthscale: 0.100   noise: 1.155
Iter 10/10 - Loss: 9.044   lengthscale: 0.100   noise: 1.218
Model 1
Iter 1/10 - Loss: 15.414   lengthscale: 0.100   noise: 0.693
Iter 2/10 - Loss: 14.357   lengthscale: 0.100   noise: 0.744
Iter 3/10 - Loss: 13.421   lengthscale: 0.100   noise: 0.798
Iter 4/10 - Loss: 12.602   lengthscale: 0.100   noise: 0.854
Iter 5/10 - Loss: 11.876   lengthscale: 0.100   noise: 0.912
Iter 6/10 - Loss: 11.232   lengthscale: 0.100   noise: 0.971
Iter 7/10 

## Model Evaluation

In [82]:
import torch.nn.functional as F
for model in models:
    model.eval()
for likelihood in likelihoods:
    likelihood.eval()

In [83]:
scores = []
for j in range(10):
    with gpytorch.settings.fast_pred_var(), torch.no_grad():
        test_dist = models[j](test_x)
        pred_means = test_dist.loc # F.log_softmax(test_dist.loc, dim = 0)
        # mean = torch.mean(pred_means)
        # pred_means = pred_means - mean
        # pred_means = pred_means/torch.max(pred_means)
        scores.append(pred_means)

In [84]:
scores = torch.stack(scores)
# scores = F.log_softmax(scores, dim = 0)

In [90]:
scores[:, 0:7]

tensor([[ -1.8704,   0.5921, -10.8498,  -0.8784, -10.1020, -11.0436,  -1.9011],
        [ -3.4424,  -6.4728,  -3.7613,  -8.5433, -13.4269,  -4.1157,  -5.6005],
        [ -1.2385,   1.7077,  -3.4207,  -5.6487, -10.6595,  -2.6236,  -1.9821],
        [ -0.9181,  -1.8034,  -7.8738,  -4.0877, -13.2874,  -7.5424,  -4.9014],
        [ -3.2178,  -8.8100,  -9.6771,  -8.0231,  -3.1888,  -9.9022,  -1.0033],
        [ -3.4878,  -7.9205, -14.0575,   1.6861, -12.2117, -14.3890,  -5.1612],
        [ -3.1249,  -1.5690, -12.8174,  -0.4174, -11.2836, -13.8376,  -1.8786],
        [ -0.0504,  -1.7671,  -6.1910,  -7.5221,  -8.6065,  -5.8387,  -3.4029],
        [ -1.8469,  -0.8593,  -8.2958,  -2.2124,  -8.7294,  -8.2936,  -2.7910],
        [ -0.4189,  -5.9738, -12.0617,  -4.3757,  -4.4351, -12.0565,  -2.9092]])

In [85]:
from sklearn.metrics import accuracy_score
# scores = torch.stack(scores)
pred_y = torch.argmax(scores, dim = 0)
accuracy_score(test_y, pred_y)

0.415

predict error

In [86]:
test_score = torch.load('test_score.pt')
test_score = torch.transpose(test_score[0:1000, :], 0, 1)
print("predict score shape:", scores.shape)
print("real score shape:", test_score.shape)


predict score shape: torch.Size([10, 1000])
real score shape: torch.Size([10, 1000])


In [87]:
pred_y[0:30]

tensor([7, 2, 2, 5, 4, 2, 4, 4, 9, 7, 5, 5, 4, 5, 1, 3, 4, 3, 3, 4, 7, 5, 5, 3,
        4, 5, 9, 4, 5, 1])

In [88]:
for j in range(10):
    loss = torch.nn.MSELoss()
    output = loss(scores[j, :], test_score[j, :])
    print("L2 loss for model", j, ":", output, "  mean:", torch.mean(scores[j, :]))

L2 loss for model 0 : tensor(79.5508)   mean: tensor(-9.6709)
L2 loss for model 1 : tensor(50.5827)   mean: tensor(-9.5833)
L2 loss for model 2 : tensor(91.9003)   mean: tensor(-8.1926)
L2 loss for model 3 : tensor(77.2568)   mean: tensor(-7.3310)
L2 loss for model 4 : tensor(47.5791)   mean: tensor(-10.2698)
L2 loss for model 5 : tensor(84.2119)   mean: tensor(-8.7220)
L2 loss for model 6 : tensor(142.7697)   mean: tensor(-10.8157)
L2 loss for model 7 : tensor(67.1875)   mean: tensor(-9.7484)
L2 loss for model 8 : tensor(40.1538)   mean: tensor(-6.6988)
L2 loss for model 9 : tensor(41.5981)   mean: tensor(-9.2816)


In [89]:
test_x.shape

torch.Size([1000, 2])