### test implementing basic EMF iterations

In [16]:
import numpy as np
import h5py

In [17]:
hf =  h5py.File('mnistexample_params.h5','r')
print('List of arrays in this file: \n', hf.keys())

('List of arrays in this file: \n', [u'mnistexample___bias', u'mnistexample___vbias', u'mnistexample___weight'])


In [18]:
hb = np.array(hf.get('mnistexample___bias'))
vb = np.array(hf.get('mnistexample___vbias'))
W = np.array(hf.get('mnistexample___weight'))

print hb.shape, vb.shape, W.shape

(256,) (784,) (256, 784)


In [19]:
hf.close()

In [32]:
from sklearn.utils.fixes import expit    
from sklearn.utils.extmath import safe_sparse_dot
from sklearn import linear_model, datasets, metrics, preprocessing 
from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score

In [33]:
def sig_means(x, b, W):
    a = safe_sparse_dot(x, W.T) + b
    return expit(a, out=a)

In [60]:
W2 = W*W
print "is W2 an element wize multiply ?" 
print W.shape, W2.shape

is W2 an element wize multiply ?
(256, 784) (256, 784)


In [35]:
mnist = datasets.fetch_mldata('MNIST original')
X, Y = mnist.data, mnist.target

In [36]:
# Pin to 0, 1 
X = (X - np.min(X, 0)) / (np.max(X, 0) + 0.0001)  # 0-1 scaling
X = preprocessing.binarize(X,0.001)

In [38]:
h = mean_hiddens(X, hb, W)

In [53]:
h2 = h*h
print h.shape, h2.shape

(70000, 256) (70000, 256)


In [40]:
v = X
print v.shape

(70000, 784)


In [42]:
print h.shape, h2.shape, v.shape, W2.shape

(70000, 256) (70000, 256) (70000, 784) (256, 784)


### run an RBM right here

In [85]:
def equilibrate(W, v0, h0, iters=3, damp=0.5):
    mv = v0
    mh = h0

    W2 = W*W
    for i in range(iters):
        mv = damp * mv_update(W2, mv, mh) +(1.0-damp) * mv
        mh = damp * mh_update(W2, mv, mh) + (1.0-damp) * mh
    end

    return m_vis, m_hid

In [86]:
def mv_update(W2, h, v):
    h2 = h*h
    a = np.dot(np.dot((h-h2),W2).T,(0.5-v))
    return expit(a, out=a)

In [87]:
def mh_update(W2, h, v):
    v2 = v*v
    a = np.dot(np.dot((v-v2),W2).T,(0.5-h))
    return expit(a, out=a)

In [92]:
#
# whenever we use v_pos, we need safe_sparse_dot
#
# lr: learning_rate
# mom: momentum
#
def update_weights(W, dW, W_prev,  lr, mom, v_pos, h_pos, v_neg, h_ne, decay, decay_norm=None):    
    dW = safe_sparse_dot(v_pos.T, h_pos, dense_output=True).T
    dW -= np.dot(h_neg.T, v_neg)
    
    h_neg2 = h_neg*h_neg
    v_neg2 = v_neg*v_neg
    dW -=  np.dot(np.dot(np.dot((h_neg-h_neg2),W2).T,(v_neg-v_neg2)),W)

    # momemtum
    dW += mom *dW_prev 
    
    # this nees to be pre-computed 
    # weight decay (L1 or L2)
    #   -lr * L1-Penalty-Gradient 
    #   -lr * L2-Penalty-Gradient 
    if decay_norm=='L1':
        dW -= decay * np.sign(W)
    elif decay_norm=='L2':
        dW -= decay * W
    
    
    # BLAS ?
    W +=  lr * dW   
    
    # update the rest
    W2 = W*W
    W_prev = W
    
    return W, W2, W_prev

In [82]:
# intercepts += v_update, h_update
def update_intercepts(lr, v_pos, h_pos, v_neg, h_neg):
    h_update = lr * (h_pos.sum(axis=0) - h_neg.sum(axis=0))
    v_update = lr * (np.asarray(v_pos.sum(axis=0)).squeeze() - v_neg.sum(axis=0))
    return v_update, h_update
    

In [84]:
def h_binomial_samples(h_neg):
    h_neg[rng.uniform(size=h_neg.shape) < h_neg] = 1.0  
    return np.floor(h_neg, h_neg)

In [97]:
def calc_weight_gradient():
    dW = 0.0

In [96]:
def regularize_weight_gradient():
    dW = 0.0