In [45]:
import math
import torch
import numpy as np
import gpytorch
from matplotlib import pyplot as plt

### Loading UMAP data
1. loading data from MNIST dataset
2. choosing 600 data points from each training dataset (6000 in total)
3. choosing data points in propotion in the testing sets as well

In [46]:
with open('features_UMAP.npy', 'rb') as f:
    train_x = np.load(f)
    train_y_label = np.load(f)
    test_x = np.load(f)
    test_y = np.load(f)

In [47]:
train_x, train_y_label, test_x, test_y = \
    torch.from_numpy(train_x), torch.from_numpy(train_y_label), \
        torch.from_numpy(test_x), torch.from_numpy(test_y)

### Loading training labels
We use the scores of training samples from DNN to be the y labels

In [48]:
train_y = torch.load('train_score_1_1.pt')
train_y.shape

torch.Size([6400, 10])

In [49]:
train_y_label[0:10]

tensor([5, 0, 4, 1, 9, 2, 1, 3, 1, 4], dtype=torch.uint8)

## Setting up the Model

In [50]:
from gpytorch.models import ExactGP
from gpytorch.means import ConstantMean
from gpytorch.kernels import ScaleKernel, RBFKernel

In [51]:
n = 2000
train_x, train_y = train_x[0: n], train_y[0: n, :]
test_y = test_y.to(torch.int64)
train_y.shape

torch.Size([2000, 10])

In [52]:
add = - torch.min(train_y) + 1
log_y = add + train_y
log_y = torch.log(log_y)
log_y

tensor([[3.1162, 2.9499, 2.8939,  ..., 3.0247, 3.1743, 3.0649],
        [3.3749, 3.1004, 2.8967,  ..., 2.7033, 3.0462, 2.8206],
        [2.9604, 2.8974, 3.0302,  ..., 3.0819, 3.0163, 3.2335],
        ...,
        [3.1766, 3.0519, 2.9361,  ..., 3.0577, 3.2233, 3.1238],
        [3.1539, 3.3090, 3.3609,  ..., 3.2874, 3.2573, 3.1866],
        [3.3748, 3.1160, 3.0213,  ..., 2.8930, 3.1180, 2.9055]])

### Model Structure

In [53]:
class ExactGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(RBFKernel())

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [54]:
models, likelihoods, mlls = [], [], []
for j in range(10):
    y = log_y[:, j]
    x = train_x
    likelihood = gpytorch.likelihoods.GaussianLikelihood()
    model = ExactGPModel(x, y, likelihood)
    likelihood.train()
    model.train()
    likelihoods.append(likelihood)
    models.append(model)
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
    mlls.append(mll)

Initialize the model and parameters

In [55]:
models, likelihoods, mlls = [], [], []
for j in range(10):
    y = log_y[:, j]
    x = train_x
    likelihood = gpytorch.likelihoods.GaussianLikelihood()
    model = ExactGPModel(x, y, likelihood)
    likelihood.train()
    model.train()
    likelihoods.append(likelihood)
    models.append(model)
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
    mlls.append(mll)

### Model Training

In [56]:
training_iter = 10

# Use the adam optimizer
lr = 0.1
opts = []
for j in range(10):
    optimizer = torch.optim.Adam(models[j].parameters(), lr = lr)  # Includes GaussianLikelihood parameters
    opts.append(optimizer)

In [57]:
for j in range(10):
    print("Model", j)
    model = models[j]
    likelihood = likelihoods[j]
    mll = mlls[j]
    optimizer = opts[j]
    y = log_y[:, j]
    for i in range(training_iter):                
        # Zero gradients from previous iteration
        optimizer.zero_grad()
        # Output from model
        output = model(train_x)
        # print(output)
        
        # Calc loss and backprop gradients
        loss = -mll(output, y)
        loss.backward()
          
        if i % 1 == 0:
            print('Iter %d/%d - Loss: %.3f   lengthscale: %.3f   noise: %.3f' % (
                i + 1, training_iter, loss.item(),
                #model.covar_module.base_kernel.lengthscale.item(),
                0.1,
                model.likelihood.noise.item()
            ))
        optimizer.step()

Model 0
Iter 1/10 - Loss: 0.943   lengthscale: 0.100   noise: 0.693
Iter 2/10 - Loss: 0.881   lengthscale: 0.100   noise: 0.644
Iter 3/10 - Loss: 0.827   lengthscale: 0.100   noise: 0.598
Iter 4/10 - Loss: 0.771   lengthscale: 0.100   noise: 0.554
Iter 5/10 - Loss: 0.728   lengthscale: 0.100   noise: 0.513
Iter 6/10 - Loss: 0.676   lengthscale: 0.100   noise: 0.474
Iter 7/10 - Loss: 0.627   lengthscale: 0.100   noise: 0.437
Iter 8/10 - Loss: 0.584   lengthscale: 0.100   noise: 0.403
Iter 9/10 - Loss: 0.536   lengthscale: 0.100   noise: 0.370
Iter 10/10 - Loss: 0.493   lengthscale: 0.100   noise: 0.340
Model 1
Iter 1/10 - Loss: 0.943   lengthscale: 0.100   noise: 0.693
Iter 2/10 - Loss: 0.883   lengthscale: 0.100   noise: 0.644
Iter 3/10 - Loss: 0.826   lengthscale: 0.100   noise: 0.598
Iter 4/10 - Loss: 0.776   lengthscale: 0.100   noise: 0.554
Iter 5/10 - Loss: 0.720   lengthscale: 0.100   noise: 0.513
Iter 6/10 - Loss: 0.673   lengthscale: 0.100   noise: 0.474
Iter 7/10 - Loss: 0.628

## Model Evaluation

In [58]:
import torch.nn.functional as F
for model in models:
    model.eval()
for likelihood in likelihoods:
    likelihood.eval()

In [59]:
scores = []
for j in range(10):
    with gpytorch.settings.fast_pred_var(), torch.no_grad():
        test_dist = models[j](test_x)
        pred_means = test_dist.loc # F.log_softmax(test_dist.loc, dim = 0)
        # mean = torch.mean(pred_means)
        # pred_means = pred_means - mean
        # pred_means = pred_means/torch.max(pred_means)
        pred_means = torch.exp(pred_means) - add
        scores.append(pred_means)

In [60]:
scores = torch.stack(scores)
# scores = F.log_softmax(scores, dim = 0)

In [61]:
scores[:, 0:7]

tensor([[-16.2327,  -0.5029,  -6.2313,  -2.1530,  -7.8336,  -6.2632, -14.4673],
        [-16.1729,  -3.5905,  -3.3923,  -5.4321,  -7.7878,  -3.5841, -13.7970],
        [-14.3248,  -3.5387,  -5.0591,  -5.5451,  -6.6715,  -4.6552, -14.0583],
        [-13.2891,  -2.1100,  -7.7058,  -4.2057,  -8.0328,  -7.2913, -14.1402],
        [-15.1589,  -4.4668,  -6.4279,  -4.1468,  -2.6314,  -6.3724, -13.1631],
        [-14.4368,  -0.6987,  -8.3945,  -4.4947,  -5.6316,  -8.2445, -14.1152],
        [-17.1779,  -4.4238,  -7.9163,  -2.8529,  -6.4350,  -8.0278, -14.4344],
        [-12.0922,  -3.3691,  -5.2750,  -4.2974,  -3.6169,  -4.9214, -12.7251],
        [-14.0580,  -2.5960,  -5.2324,  -3.9002,  -5.0830,  -5.0630, -13.5020],
        [-13.8602,  -2.9103,  -8.0139,  -5.2001,  -1.9612,  -7.8817, -12.9287]])

In [62]:
from sklearn.metrics import accuracy_score
# scores = torch.stack(scores)
pred_y = torch.argmax(scores, dim = 0)
accuracy_score(test_y, pred_y)

0.422

predict error

In [63]:
test_score = torch.load('test_score.pt')
test_score = torch.transpose(test_score[0:1000, :], 0, 1)
print("predict score shape:", scores.shape)
print("real score shape:", test_score.shape)


predict score shape: torch.Size([10, 1000])
real score shape: torch.Size([10, 1000])


In [64]:
pred_y[0:30]

tensor([7, 0, 1, 0, 9, 1, 7, 4, 9, 7, 6, 5, 7, 6, 0, 3, 9, 3, 3, 9, 7, 5, 5, 3,
        4, 0, 5, 9, 0, 1])

In [65]:
for j in range(10):
    loss = torch.nn.MSELoss()
    output = loss(scores[j, :], test_score[j, :])
    print("L2 loss for model", j, ":", output, "  mean:", torch.mean(scores[j, :]))

L2 loss for model 0 : tensor(93.0665)   mean: tensor(-7.8625)
L2 loss for model 1 : tensor(76.5207)   mean: tensor(-8.2426)
L2 loss for model 2 : tensor(102.1648)   mean: tensor(-8.0128)
L2 loss for model 3 : tensor(102.3191)   mean: tensor(-6.4702)
L2 loss for model 4 : tensor(95.2256)   mean: tensor(-7.7280)
L2 loss for model 5 : tensor(82.3007)   mean: tensor(-7.0859)
L2 loss for model 6 : tensor(106.1899)   mean: tensor(-9.2806)
L2 loss for model 7 : tensor(132.0182)   mean: tensor(-7.1732)
L2 loss for model 8 : tensor(42.3574)   mean: tensor(-6.9393)
L2 loss for model 9 : tensor(73.7541)   mean: tensor(-8.2400)


In [66]:
test_x.shape

torch.Size([1000, 2])