### 1) Initialization
a) Compute $\mathbf{D}$ where $\mathbf{D}=\mathbf{[H,X]}$ <br>
b) Compute $\mathbf{D}^T\mathbf{y}, \mathbf{D}^T\mathbf{D}$, and its eigenvalues $\lambda^0_1,\dots,\lambda^0_B$ <br>
c) Initialize $\sigma^2$ and $\gamma$ to default values

### 2) Posterior update
a) Computer posterior mean $\mathbf{m}$ as in $$\mathbf{m}=\frac{1}{\sigma^2}\Sigma\mathbf{D}^T\mathbf{y}$$ <br>
b) Computer posterior covariance $\Sigma$ as in $$\Sigma^{-1}=\gamma\mathbf{I}+\frac{1}{\sigma^2}\mathbf{D}^T\mathbf{D}$$

### 3) Hyper-parameters update
a) Compute updated eigenvalues $$\lambda_i=\frac{1}{\sigma^2}\lambda^0_i,\qquad i=1,\dots,B$$ <br>
b) Update $\gamma$ as in $$\gamma=\frac{\delta + 2\alpha_1}{\|\mathbf{y}\|^2_2 + 2\alpha_2}$$
c) Update $\sigma^2$ as in $$\sigma^2=\frac{\|\mathbf{y}-\mathbf{D}\beta\|^2_2+\alpha_4}{N-\delta+2\alpha_3}$$
where $\delta$ is defined as $$\delta=\sum^B_{i=1}\frac{\lambda_i}{\gamma+\lambda_i}$$

In [1]:
import numpy as np
import sklearn.datasets as sk_dataset
from sklearn.model_selection import train_test_split, KFold
from sklearn.linear_model import BayesianRidge
from scipy.io import loadmat

import pymc3 as pm
import theano.tensor as T
import arviz as az



In [2]:
def standardize(x):
    std = np.maximum(np.std(x, axis=0), 1/np.sqrt(len(x)))
    mean = np.mean(x, axis=0)
    return (x - mean) / std

def relu(x):
    return np.maximum(0, x)

def one_hot_encoding(label, n_class):
    y = np.zeros([len(label), n_class])
    for i in range(len(label)):
        y[i, label[i]] = 1
    return y

In [3]:
n_node = 10
n_iter = 1000
lam = 1 # regularization parameter, lambda
w_range = [-1, 1] # range of random weights
b_range = [0, 1] # range of random biases

dataset = loadmat('coil20.mat')
label = np.array([dataset['Y'][i][0] - 1 for i in range(len(dataset['Y']))])
data = dataset['X']
n_class = 20

# train-test-split
X_train, X_test, y_train, y_test = train_test_split(data, label, test_size=0.3, random_state=42)
# kf = KFold(10, True, 1)
val_acc = []
max_index = -1

X_train = standardize(X_train)
n_sample, n_feature = np.shape(X_train)
print(np.shape(X_train))

weights = (w_range[1] - w_range[0]) * np.random.random([n_feature, n_node]) + w_range[0]
bias = (b_range[1] - b_range[0]) * np.random.random([1, n_node]) + b_range[0]

h = relu(np.dot(X_train, weights) + np.dot(np.ones([n_sample, 1]), bias))
print(np.shape(h))
d = np.concatenate([h, X_train], axis=1)
print(np.shape(d))
d = np.concatenate([d, np.ones_like(d[:, 0:1])], axis=1) # concat column of 1s
print(np.shape(d))
y = one_hot_encoding(y_train, n_class)
dT_y = np.dot(d.T, y)
dT_d = np.dot(d.T, d)

(1008, 1024)
(1008, 10)
(1008, 1034)
(1008, 1035)


In [4]:
model = pm.Model()
with model:
    prec = pm.Gamma('prec', alpha=10**(-5), beta=10**(-5))
    var = pm.Gamma('var', alpha=10**(-5), beta=10**(-5))
    beta = pm.Normal('beta', mu=0, tau=prec, shape=(n_feature + n_node + 1, n_class))
    y_obs = pm.Normal('y_obs', mu=T.dot(d,beta), tau=var, observed=y)

    start = pm.find_MAP()
    approx = pm.fit(n_iter, start=start, obj_optimizer=pm.adam())
    trace = pm.sample_approx(approx=approx, draws=5000)
    pm.trace_plot(trace)
    pm.summary(trace)




Finished [100%]: Average Loss = 1.446e+08



You can find the C code in this temporary file: C:\Users\yc000\AppData\Local\Temp\theano_compilation_error_5l48y93j


Exception: ('The following error happened while compiling the node', softplus(InplaceDimShuffle{x,0}.0), '\n', "Compilation failed (return status=1): C:\\Users\\yc000\\AppData\\Local\\Temp\\ccbrtr1T.o: In function `__struct_compiled_op_mac4891c23a08b318d15b3ee845c76812a262bec370140b542616c81ccec6dd4b_executor':\r. C:/Users/yc000/AppData/Local/Theano/compiledir_Windows-10-10.0.19043-SP0-Intel64_Family_6_Model_140_Stepping_1_GenuineIntel-3.9.7-64/tmpgbjcegqg/mod.cpp:577: undefined reference to `__gxx_personality_sj0'\r. C:/Users/yc000/AppData/Local/Theano/compiledir_Windows-10-10.0.19043-SP0-Intel64_Family_6_Model_140_Stepping_1_GenuineIntel-3.9.7-64/tmpgbjcegqg/mod.cpp:577: undefined reference to `_Unwind_SjLj_Register'\r. C:\\Users\\yc000\\AppData\\Local\\Temp\\ccbrtr1T.o: In function `run':\r. C:/Users/yc000/AppData/Local/Theano/compiledir_Windows-10-10.0.19043-SP0-Intel64_Family_6_Model_140_Stepping_1_GenuineIntel-3.9.7-64/tmpgbjcegqg/mod.cpp:549: undefined reference to `_Unwind_SjLj_Unregister'\r. C:\\Users\\yc000\\AppData\\Local\\Temp\\ccbrtr1T.o: In function `__gnu_cxx::new_allocator<std::pair<int, int> >::deallocate(std::pair<int, int>*, unsigned long long)':\r. C:/Users/yc000/miniconda3/envs/pm3env/Library/mingw-w64/include/c++/5.3.0/ext/new_allocator.h:110: undefined reference to `_Unwind_SjLj_Resume'\r. C:\\Users\\yc000\\AppData\\Local\\Temp\\ccbrtr1T.o:mod.cpp:(.data+0xc0): undefined reference to `__gxx_personality_sj0'\r. collect2.exe: error: ld returned 1 exit status\r. ", 'FunctionGraph(softplus(<TensorType(float64, row)>))')

In [None]:
# model = pm.Model()
# with model:
#     weights = pm.Normal('weights', mu=0, tau=1, shape=(n_feature, n_node))
#     bias = pm.Normal('bias', mu=0, tau=1)
#     gamma_0 = pm.Gamma('gamma_0', alpha=10**(-5), beta=10**(-5))
#     gamma_1 = pm.Gamma('gamma_1', alpha=10**(-5), beta=10**(-5))
#     beta = pm.Normal