In [2]:
from __future__ import print_function

import vdrvc 
import warnings
import numpy as np
import matplotlib.pyplot as plt

from theano import tensor as T
from sklearn.datasets import load_digits
from IPython.display import clear_output as cls
from sklearn.metrics import accuracy_score as acc
from sklearn.cross_validation import train_test_split

%matplotlib inline
warnings.simplefilter("ignore")
np.random.seed(15632)

# Load DIGITS data

In [3]:
data = load_digits()
X = data['images'].reshape(-1, 8*8)
t = data['target']
X -= np.mean(X, axis=0)

X_train, X_test, t_train, t_test = train_test_split(X, t, train_size=1200)

# Fit the VD-ARD model (our model)

In [4]:
# Fit the VD-ARD model
vd = vdrvc.vdrvc()
vd = vd.fit(X_train, t_train, num_classes=10, batch_size=X_train.shape[0], max_iter=100000,
            lr=1e-3, beta=0.9, display_each=2500)

iter = 0.0000 vlb = 124447.4784 acc = 0.0867 ard = 0.1125
iter = 2500.0000 vlb = 5524.2584 acc = 0.9467 ard = 0.1062
iter = 5000.0000 vlb = 1046.4286 acc = 0.9850 ard = 0.1531
iter = 7500.0000 vlb = 274.9015 acc = 0.9975 ard = 0.1594
iter = 10000.0000 vlb = 183.8823 acc = 1.0000 ard = 0.2016
iter = 12500.0000 vlb = 110.1083 acc = 1.0000 ard = 0.2672
iter = 15000.0000 vlb = 65.2960 acc = 1.0000 ard = 0.3359
iter = 17500.0000 vlb = -14.7746 acc = 1.0000 ard = 0.3875
iter = 20000.0000 vlb = -57.4132 acc = 1.0000 ard = 0.4562
iter = 22500.0000 vlb = -51.1340 acc = 1.0000 ard = 0.5312
iter = 25000.0000 vlb = -38.6811 acc = 1.0000 ard = 0.5422
iter = 27500.0000 vlb = -65.6178 acc = 1.0000 ard = 0.6219
iter = 30000.0000 vlb = -117.4448 acc = 1.0000 ard = 0.6203
iter = 32500.0000 vlb = -110.7357 acc = 1.0000 ard = 0.6562
iter = 35000.0000 vlb = -100.1785 acc = 1.0000 ard = 0.6453
iter = 37500.0000 vlb = -116.7272 acc = 1.0000 ard = 0.6656
iter = 40000.0000 vlb = -136.4987 acc = 1.0000 ard = 0.

In [5]:
# Remove features with dropout rate p > 0.99 ~ log_alpha > 5
ard = np.sum(vd.log_alpha > 5)
vd.theta_old = vd.theta.copy()
vd.theta[vd.log_alpha > 5] = 0
# Accuracy score
trainp, testp = acc(vd.predict(X_train), t_train), acc(vd.predict(X_test), t_test) 

print('sparsity level: %0.4f' % (ard * 1.0 / vd.theta.size))
print('train accuracy: %0.4f' % trainp)
print('test accuracy:  %0.4f' % testp)

sparsity level: 0.7547
train accuracy: 1.0000
test accuracy:  0.9481


# L1 logistic regression

In [6]:
from sklearn.linear_model import LogisticRegressionCV as LR

# Train L1 logistic regression with cross-validation 
lr = LR(penalty='l1', verbose=3, solver='liblinear', max_iter=1000, n_jobs=2).fit(X_train, t_train)

ard = np.sum(lr.coef_ == 0)
trainp, testp = acc(lr.predict(X_train), t_train), acc(lr.predict(X_test), t_test) 
cls()
print('sparsity level: %0.4f' % (ard * 1.0 / lr.coef_.size))
print('train accuracy: %0.4f' % trainp)
print('test accuracy:  %0.4f' % testp)

sparsity level: 0.3797
train accuracy: 0.9925
test accuracy:  0.9548


# RVM

In [9]:
from skbayes.rvm_ard_models.fast_rvm import ClassificationARD
# https://github.com/AmazaspShumik/sklearn-bayes

rvm = ClassificationARD().fit(X_train, t_train)

ard = np.array(rvm.active_).size - np.sum(rvm.active_)
trainp, testp = acc(rvm.predict(X_train), t_train), acc(rvm.predict(X_test), t_test) 
print('sparsity level: %0.4f' % (ard * 1.0 / np.array(rvm.active_).size))
print('train accuracy: %0.4f' % trainp)
print('test accuracy:  %0.4f' % testp)

sparsity level: 0.7469
train accuracy: 0.9783
test accuracy:  0.9447
