In [4]:
from __future__ import print_function

import vdrvc 
import warnings
import numpy as np
import matplotlib.pyplot as plt

from data import reader
from theano import tensor as T
from sklearn.datasets import load_digits
from IPython.display import clear_output as cls
from sklearn.metrics import accuracy_score as acc
from sklearn.cross_validation import train_test_split

%matplotlib inline
warnings.simplefilter("ignore")
np.random.seed(15632)

# Load MNIST data

In [6]:
X_train, t_train, X_test, t_test, _, _ = reader.load_mnist()[0]
X_train, X_test = X_train.reshape(len(X_train), -1), X_test.reshape(len(X_test), -1)

# Variational DropOut

In [10]:
vd = vdrvc.vdrvc()
vd = vd.fit(X_train, t_train, num_classes=10, batch_size=X_train.shape[0],
            max_iter=15000, lr=1e-2, beta=0.9, display_each=1000)

iter = 0 vlb = 1295159.0781 acc = 0.1145 ard = 0.1056
iter = 1000 vlb = 36276.4532 acc = 0.9264 ard = 0.2341
iter = 2000 vlb = 20395.7240 acc = 0.9342 ard = 0.2976
iter = 3000 vlb = 15236.5419 acc = 0.9348 ard = 0.3638
iter = 4000 vlb = 13622.8063 acc = 0.9341 ard = 0.4071
iter = 5000 vlb = 12913.4083 acc = 0.9342 ard = 0.4557
iter = 6000 vlb = 12584.1452 acc = 0.9340 ard = 0.4962
iter = 7000 vlb = 12297.6962 acc = 0.9339 ard = 0.5460
iter = 8000 vlb = 12273.3160 acc = 0.9340 ard = 0.5828
iter = 9000 vlb = 12172.8914 acc = 0.9339 ard = 0.6110
iter = 10000 vlb = 12165.3529 acc = 0.9340 ard = 0.6431
iter = 11000 vlb = 12071.5542 acc = 0.9339 ard = 0.6666
iter = 12000 vlb = 12030.7657 acc = 0.9341 ard = 0.6710
iter = 13000 vlb = 12044.9001 acc = 0.9339 ard = 0.6823
iter = 14000 vlb = 12015.5507 acc = 0.9340 ard = 0.6976


In [11]:
# Remove features with dropout rate p > 0.99 ~ log_alpha > 5
ard = np.sum(vd.log_alpha > 5)
vd.theta_old = vd.theta.copy()
vd.theta[vd.log_alpha > 5] = 0
# Accuracy score
trainp, testp = acc(vd.predict(X_train), t_train), acc(vd.predict(X_test), t_test) 
print('sparsity = %0.4f' % (ard * 1.0 / vd.theta.size))
print('train = %0.4f' % trainp)
print('test = %0.4f' % testp)

sparsity = 0.6976
train = 0.9339
test = 0.9260


# L1 Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegressionCV as LR

lr = LR(penalty='l1', verbose=3, solver='liblinear').fit(X_train, t_train)
cls()



In [14]:
ard = np.sum(lr.coef_ == 0)
trainp, testp = acc(lr.predict(X_train), t_train), acc(lr.predict(X_test), t_test) 
print('sparsity = %0.4f' % (ard * 1.0 / lr.coef_.size))
print('train = %0.4f' % trainp)
print('test = %0.4f' % testp)

sparsity = 0.5781
train = 0.9259
test = 0.9185


# Vanila RVM (does not work on MNIST)

In [None]:
#from skbayes.rvm_ard_models.fast_rvm import ClassificationARD
# https://github.com/AmazaspShumik/sklearn-bayes