In [1]:
# Support for maths
import numpy as np
# Plotting tools
from matplotlib import pyplot as plt
from sklearn.preprocessing import LabelBinarizer
# we use the following for plotting figures in jupyter
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

# GPy: Gaussian processes library
import GPy

In [2]:
N = 5000

In [3]:
# (train_data, train_labels), (test_data, test_labels) = FashionMNIST()

train_data = np.load('../datasets/fashion_mnist/numpy/train_data_fashion_mnist.npy').astype('uint8')
test_data = np.load('../datasets/fashion_mnist/numpy/test_data_fashion_mnist.npy').astype('uint8')
train_labels = np.load('../datasets/fashion_mnist/numpy/train_targets_fashion_mnist.npy').astype('uint8')
test_labels = np.load('../datasets/fashion_mnist/numpy/test_targets_fashion_mnist.npy').astype('uint8')

# Convert one-hot to integers
train_labels = np.argmax(train_labels, axis=1)[:N]
test_labels = np.argmax(test_labels, axis=1)

D = train_data[0].reshape(-1).shape[0]

# Flatten the images
train_data = train_data.reshape(-1, D)[:N]
test_data = test_data.reshape(-1, D)

In [4]:
def threshold_binarize(data, threshold):
    data_bin = np.where(data>threshold, 1, 0).astype('uint8')
    return data_bin

In [5]:
# fashion mnist has values between 0 and 255
threshold = 10

train_data_bin = threshold_binarize(train_data, threshold)
test_data_bin = threshold_binarize(test_data, threshold)

In [6]:
# like one-hot encoding with 0 corresponding to -1
label_binarizer = LabelBinarizer(pos_label=1, neg_label=-1)
train_labels_bin = label_binarizer.fit_transform(train_labels)
test_labels_bin = label_binarizer.fit_transform(test_labels)

## Cross-validate for alpha in Ridge Regression

In [47]:
from sklearn.linear_model import RidgeClassifier
from sklearn.model_selection import GridSearchCV

In [56]:
parameters = {'alpha':list(range(10, 100, 10))}
ridge = RidgeClassifier(fit_intercept=False, normalize=False)
clf = GridSearchCV(ridge, parameters, cv=5)
clf.fit(train_data_bin, train_labels)

GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=RidgeClassifier(alpha=1.0, class_weight=None,
                                       copy_X=True, fit_intercept=False,
                                       max_iter=None, normalize=False,
                                       random_state=None, solver='auto',
                                       tol=0.001),
             iid='warn', n_jobs=None,
             param_grid={'alpha': [10, 20, 30, 40, 50, 60, 70, 80, 90]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [57]:
clf.cv_results_

{'mean_fit_time': array([0.02611384, 0.02173209, 0.02117152, 0.01832695, 0.01873837,
        0.01865931, 0.01780648, 0.01812787, 0.0185832 ]),
 'std_fit_time': array([0.00252503, 0.00250487, 0.00460071, 0.00193032, 0.00246728,
        0.00103241, 0.00105027, 0.0009389 , 0.00162544]),
 'mean_score_time': array([0.00073705, 0.00071554, 0.00054741, 0.00056157, 0.00059514,
        0.00066681, 0.00054832, 0.00058923, 0.00058675]),
 'std_score_time': array([1.14446143e-04, 1.50095613e-04, 6.39032958e-06, 2.98583839e-05,
        6.17410034e-05, 1.27292249e-04, 4.77409020e-06, 8.91765295e-05,
        3.02019035e-05]),
 'param_alpha': masked_array(data=[10, 20, 30, 40, 50, 60, 70, 80, 90],
              mask=[False, False, False, False, False, False, False, False,
                    False],
        fill_value='?',
             dtype=object),
 'params': [{'alpha': 10},
  {'alpha': 20},
  {'alpha': 30},
  {'alpha': 40},
  {'alpha': 50},
  {'alpha': 60},
  {'alpha': 70},
  {'alpha': 80},
  {'alph

In [None]:
# => alpha = 30 seems to be a good regularization strength for the linear kernel!

In [7]:
# Create a 1-D RBF kernel with default parameters
k = GPy.kern.RBF(input_dim=784)
# Preview the kernel's parameters
k

rbf.,value,constraints,priors
variance,1.0,+ve,
lengthscale,1.0,+ve,


In [8]:
m = GPy.models.GPRegression(train_data_bin, train_labels_bin, k)
m

GP_regression.,value,constraints,priors
rbf.variance,1.0,+ve,
rbf.lengthscale,1.0,+ve,
Gaussian_noise.variance,1.0,+ve,


In [10]:
# Constrain the regression parameters to be positive only
m.constrain_positive()

m.kern.variance = 0.1
m.kern.lengthscale = np.sqrt(D)

# Fix the Gaussian noise variance at 0.01 
m.Gaussian_noise.variance = 1e-6 # (Reset the parameter first)
m.Gaussian_noise.variance.fix()
# m.Gaussian_noise.variance.unfix()

m.optimize()
m

reconstraining parameters GP_regression


GP_regression.,value,constraints,priors
rbf.variance,0.3248396771646316,+ve,
rbf.lengthscale,6.8153834810410965,+ve,
Gaussian_noise.variance,1e-06,+ve fixed,


In [9]:
# Constrain the regression parameters to be positive only
m.constrain_positive()

m.kern.variance = 0.1
m.kern.lengthscale = np.sqrt(D)

# Fix the Gaussian noise variance at 0.01 
m.Gaussian_noise.variance = 1e-2 # (Reset the parameter first)
m.Gaussian_noise.variance.fix()
# m.Gaussian_noise.variance.unfix()

m.optimize()
m

reconstraining parameters GP_regression


GP_regression.,value,constraints,priors
rbf.variance,0.306347886211182,+ve,
rbf.lengthscale,7.25549786847386,+ve,
Gaussian_noise.variance,0.01,fixed +ve,


In [11]:
# Constrain the regression parameters to be positive only
m.constrain_positive()

m.kern.variance = 0.1
m.kern.lengthscale = np.sqrt(D)

# Fix the Gaussian noise variance at 0.01 
m.Gaussian_noise.variance = 10 # (Reset the parameter first)
m.Gaussian_noise.variance.fix()
# m.Gaussian_noise.variance.unfix()

m.optimize()
m

reconstraining parameters GP_regression


GP_regression.,value,constraints,priors
rbf.variance,0.6838136837863136,+ve,
rbf.lengthscale,50.92407055717207,+ve,
Gaussian_noise.variance,10.0,+ve fixed,


In [9]:
# Constrain the regression parameters to be positive only
m.constrain_positive()

m.kern.variance = 0.1
m.kern.lengthscale = np.sqrt(D)

# Fix the Gaussian noise variance at 0.01 
m.Gaussian_noise.variance = 10 # (Reset the parameter first)
# m.Gaussian_noise.variance.fix()
m.Gaussian_noise.variance.unfix()

m.optimize()
m

reconstraining parameters GP_regression


GP_regression.,value,constraints,priors
rbf.variance,0.1789339259732666,+ve,
rbf.lengthscale,7.177267570983543,+ve,
Gaussian_noise.variance,0.0414465985822242,+ve,


In [10]:
# Get mean and covariance of optimised GP
train_mean, train_cov = m.predict_noiseless(train_data_bin, full_cov=False)

In [11]:
# Get mean and covariance of optimised GP
test_mean, test_cov = m.predict_noiseless(test_data_bin, full_cov=False)

In [26]:
mean.shape

(10000, 10)

In [12]:
train_score = np.sum(np.equal(np.argmax(train_mean, 1), np.argmax(train_labels_bin, 1))) / len(train_data) * 100

In [13]:
test_score = np.sum(np.equal(np.argmax(test_mean, 1), np.argmax(test_labels_bin, 1))) / len(test_data) * 100

In [14]:
# 5K: all parameters optimized
# rbf.variance 0.1789339259732666
# rbf.lengthscale 7.177267570983543
# Gaussian_noise.variance 0.04144659858222422
test_score

85.83

In [16]:
# 1K: all parameters optimized
# rbf.variance 0.2774811544636858
# rbf.lengthscale 8.578422490836331
# Gaussian_noise.variance 0.03759184663277108
test_score

81.55

In [26]:
# 1K: no optimized kernel (0.1 variance)
score

76.59

In [22]:
# 1K: no optimized kernel (1000 variance)
score

83.67

In [45]:
# 1K: no optimized kernel
score

76.55999999999999

In [30]:
# 1K: optimized kernel
# rbf.variance 0.32483967716463163
# rbf.lengthscale 6.8153834810410965
# test score
score

80.93

In [37]:
# train score
score

100.0

In [44]:
# train score 0.01 noise
score

100.0

In [12]:
# 10K: no optimized kernel
score

83.67

In [28]:
# 10K: optimized kernel
score

86.65