In [1]:
import numpy as np
import sklearn.datasets as sk_dataset
from sklearn.model_selection import train_test_split, KFold
from sklearn.linear_model import BayesianRidge
from scipy.io import loadmat

import pymc3 as pm



In [4]:
def standardize(x):
    std = np.maximum(np.std(x, axis=0), 1/np.sqrt(len(x)))
    mean = np.mean(x, axis=0)
    return (x - mean) / std

def relu(x):
    return np.maximum(0, x)

def one_hot_encoding(label, n_class):
    y = np.zeros([len(label), n_class])
    for i in range(len(label)):
        y[i, label[i]] = 1
    return y

def softmax(x):
    return np.exp(x) / np.repeat((np.sum(np.exp(x), axis=1))[:, np.newaxis], len(x[0]), axis=1)

def sigmoid_kernel(X, beta, weights, bias):
    n_sample = len(X)
    h = relu(np.dot(X, weights) + np.dot(np.ones([n_sample, 1]), bias))
    d = np.concatenate([h, X], axis=1)
    out = np.dot(d, beta)
    return out

In [5]:
n_node = 10
n_iter = 1000
lam = 1 # regularization parameter, lambda
w_range = [-1, 1] # range of random weights
b_range = [0, 1] # range of random biases
alpha_1 = 10**(-5) # Gamma distribution parameter
alpha_2 = 10**(-5)
alpha_3 = 10**(-5)
alpha_4 = 10**(-5)
tol = 1.0e-3

dataset = loadmat('coil20.mat')
label = np.array([dataset['Y'][i][0] - 1 for i in range(len(dataset['Y']))])
data = dataset['X']
n_class = 20

# train-test-split
X_train, X_test, y_train, y_test = train_test_split(data, label, test_size=0.3, random_state=42)
# kf = KFold(10, True, 1)
val_acc = []
max_index = -1

X_train = standardize(X_train)
X_test = standardize(X_test)
n_sample, n_feature = np.shape(X_train)
y = one_hot_encoding(y_train, n_class)

# weights = (w_range[1] - w_range[0]) * np.random.random([n_feature, n_node]) + w_range[0]
# bias = (b_range[1] - b_range[0]) * np.random.random([1, n_node]) + b_range[0]

### 1) Initialization
a) Compute $\mathbf{D}$ where $\mathbf{D}=\mathbf{[H,X]}$ <br>

In [4]:
# h = relu(np.dot(X_train, weights) + np.dot(np.ones([n_sample, 1]), bias))
# d = np.concatenate([h, X_train], axis=1)
# d = np.concatenate([d, np.ones_like(d[:, 0:1])], axis=1) # concat column of 1s

b) Compute $\mathbf{D}^T\mathbf{y}, \mathbf{D}^T\mathbf{D}$, and its eigenvalues $\lambda^0_1,\dots,\lambda^0_B$ <br>

In [5]:
# dT_y = np.dot(d.T, y)
# dT_d = np.dot(d.T, d)
# eigen_val = np.linalg.eigvalsh(dT_d)
# eigen_val

array([-1.22371836e-11, -2.28070375e-12, -2.09044845e-12, ...,
        2.23715050e+05,  3.83469564e+05,  7.23848649e+05])

c) Initialize $\sigma^2$ and $\gamma$ to default values <br>
Evidence approximation (MAP estimation on the posterior of the hyper-parameters):
$$p(\gamma)=\text{Gamma}(\gamma \mid \alpha_1, \alpha_2)$$
$$p(\sigma^2)=\text{Gamma}(\sigma^{-2} \mid \alpha_3, \alpha_4)$$
$$ \sigma_*^2, \gamma_*^2 = \arg\max \left\{ \int_{\mathbf{R}^B} p(\mathbf{y} \mid \mathbf{X}, \mathbf{\beta}, \sigma^2)p(\mathbf{\beta} \mid \gamma) p(\gamma)p(\sigma^2)\,d\beta \right\}$$

In [6]:
# Evidence approximation
basic_model = pm.Model()
with basic_model:
    weights = pm.Normal('weights', mu=0, tau=1, shape=(n_feature, n_node))
    bias = pm.Normal('bias', mu=0, tau=1, shape=(1, n_node))
    prec = pm.Gamma('prec', alpha=alpha_1, beta=alpha_2)
    var = pm.Gamma('var', alpha=alpha_3, beta=alpha_4)
    beta = pm.Normal('beta', mu=0, tau=prec, shape=(n_feature + n_node, n_class))
    y_obs = pm.Normal('y_obs', mu=sigmoid_kernel(X_train,beta,weights,bias), tau=var, observed=y)
    start = pm.find_MAP()
    approx = pm.fit(n_iter, start=start, obj_optimizer=pm.adam())
    trace = pm.sample_approx(approx=approx, draws=5000)
    pm.traceplot(trace)
    pm.summary(trace)
    # Mean of 5000 draws
    post_pred = pm.sample_ppc(trace, samples=5000, model=basic_model)
    y_train_pred = np.mean(post_pred['y_obs'], axis=0)
    y_train_pred = np.argmax(y_train_pred, axis=1)
    # Evaluate accuracy
    train_acc = np.sum(np.equal(y_train_pred, y))/len(y)
    X_train.set_value(X_test)
    post_pred = pm.sample_ppc(trace,samples=5000,model=basic_model)
    y_test_pred = np.mean(post_pred['Y_obs'],axis=0)
    y_test_pred = np.argmax(y_test_pred, axis=1)
    test_acc = np.sum(np.equal(y_test_pred, y_test))/len(y_test_pred)
print(train_acc)
print(test_acc)
