# Diary for CRBM implementation



This notebook shows the parts from `crbm.py` with some details

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import numpy as np 
import pandas as pd
import numexpr as ne
import sklearn
from sklearn import preprocessing



##### read data from  `../Datasets/motion.mat`

More data from human motion captures can be found here:

http://people.csail.mit.edu/ehsu/work/sig05stf/

In [2]:
from scipy.io import loadmat  # this is the SciPy module that loads mat-files
data = loadmat('../Datasets/motion.mat')

In [3]:
data.keys()

dict_keys(['__header__', '__version__', '__globals__', 'skel', 'Motion'])

In [4]:
X1 = data["Motion"][0][0]
X2 = data["Motion"][0][1]
X3 = data["Motion"][0][2]

In [68]:
X1.shape, X2.shape, X2.shape

((1750, 108), (1040, 108), (1040, 108))

Several features are 0

In [6]:
#(X1 - np.min(X1,0)) / (np.max(X1,0) - np.min(X1,0))* (np.min(X1,0) != 0)

In [7]:
X1[:,3].min(), X1[:,3].max(), X1.shape

(-1049.559326171875, 490.09881591796881, (1750, 108))

In [8]:
n_features = X1.shape[1]
for f in range(n_features):
    max_val, min_val =  X1[:, f].max(), X1[:, f].min()
    if (max_val - min_val) != 0:
        X1[:, f] = ( X1[:, f]  - min_val)  / (max_val - min_val)
    else:
        #print(f, max_val, max_val)
        X1[:, f] = ( X1[:, f]  - min_val) # / (max_val - min_val)


In [9]:
X1.min(), X1.max()

(0.0, 1.0)

### CRBM class

In [33]:
a = 10
b=2
np.zeros([a,b])gm

SyntaxError: invalid syntax (<ipython-input-33-814e5abe2947>, line 3)

In [34]:
class CRBM:
    def __init__(self, n_vis, n_hid, n_cond, seed=42, sigma=0.3, monitor_time=True):

        self.previous_xneg = None
        np.random.seed(seed)

        W = np.random.normal(0, sigma, [n_hid, n_vis])   # vis to hid
        A = np.random.normal(0, sigma, [n_vis, n_vis * n_cond])  # cond to vis
        B = np.random.normal(0, sigma, [n_hid, n_vis * n_cond])  # cond to hid

        v_bias = np.zeros([n_vis, 1]) 
        h_bias = np.zeros([n_hid, 1])

        dy_v_bias = np.zeros([n_vis, 1])
        dy_h_bias = np.zeros([n_hid, 1])

        self.W = np.array(W, dtype='float32')
        self.A = np.array(A, dtype='float32')
        self.B = np.array(B, dtype='float32')
        self.v_bias = v_bias
        self.h_bias = h_bias
        self.dy_v_bias = dy_v_bias
        self.dy_h_bias = dy_h_bias
        
        self.n_vis = n_vis
        self.n_hid = n_hid
        self.n_his = n_cond
        
        self.num_epochs_trained = 0
        self.lr = 0
        self.monitor_time = monitor_time

In [35]:
crbm = CRBM(n_vis=108, n_hid=256, n_cond=20, seed=123, sigma = 0.3)

In [36]:
crbm.W.shape, crbm.A.shape, crbm.B.shape

((256, 108), (108, 2160), (256, 2160))

### Auxiliary functions

In [37]:

def sig(v):
    return ne.evaluate("1/(1 + exp(-v))")


def split_vis(crbm: CRBM, vis: np.ndarray):
    n_his = vis.shape[0]
    cond = vis[0:(n_his-1), :].T
    x = vis[[n_his-1],:].T
    
    assert  crbm.n_vis == x.shape[0] and crbm.n_vis == cond.shape[0], \
            "crbm.n_vis = {}, is different from x.shape[0] = {} or cond.shape[0] = {}".format(crbm.n_vis,
                                                                                                  x.shape[0],
                                                                                                  cond.shape[0])
    return x, cond


def dynamic_biases_up(crbm: CRBM, cond: np.ndarray):
    crbm.dy_v_bias = np.dot(crbm.A, cond) + crbm.v_bias 
    crbm.dy_h_bias = np.dot(crbm.B, cond) + crbm.h_bias
        
        
def hid_means(crbm: CRBM, vis: np.ndarray):
    p = np.dot(crbm.W, vis) + crbm.dy_h_bias
    return sig(p)
    
    
def vis_means(crbm: CRBM, hid: np.ndarray):   
    p = np.dot(crbm.W.T, hid) + crbm.dy_v_bias
    return sig(p)


In [38]:
X = X1[0:21, :]
X.shape, crbm.n_his

((21, 108), 20)

In [39]:
vis, cond = split_vis(crbm, X)
vis.shape, cond.shape

((108, 1), (108, 20))

### Compute gradients

```
function gibbs(rbm::AbstractRBM, vis::Mat; n_times=1)
    v_pos = vis
    h_pos = sample_hiddens(rbm, v_pos)
    v_neg = sample_visibles(rbm, h_pos)
    h_neg = sample_hiddens(rbm, v_neg)
    for i=1:n_times-1
        v_neg = sample_visibles(rbm, h_neg)
        h_neg = sample_hiddens(rbm, v_neg)
    end
    return v_pos, h_pos, v_neg, h_neg
end
```

In [40]:

def sample_hiddens(crbm: CRBM, v: np.ndarray, cond: np.ndarray):
    h_mean = sig( np.dot(crbm.W, v) +  np.dot(crbm.B, cond) + crbm.h_bias)
    h_sample = h_mean > np.random.random(h_mean.shape).astype(np.float32)
    return h_sample, h_mean


def sample_visibles(crbm: CRBM, h: np.ndarray, cond: np.ndarray):
    """
    Notice we don't sample or put the sigmoid here since visible units are Gaussian
    """
    v_mean = np.dot(crbm.W.T, h) + np.dot(crbm.A, cond) + crbm.v_bias  
    return v_mean


In [41]:
def CDK(crbm, vis,cond, K=1):
    v_pos = vis
    h_pos, h_pos_p = sample_hiddens(crbm, v_pos, cond)
    v_neg          = sample_visibles(crbm, h_pos, cond)
    h_neg, h_neg_p = sample_hiddens(crbm, v_neg, cond)

    for i in range(K-1):
        v_neg           = sample_visibles(crbm, h_neg, cond)
        h_neg, h_neg_p  = sample_hiddens(crbm, v_neg, cond)
    
    return v_pos, h_pos_p , v_neg, h_neg_p

In [42]:
def compute_gradient(crbm, X):
    """
    Computes an approximated gradient of the likelihod (for a given minibatch X) with
    respect to the parameters. 
    """
    vis, cond = split_vis(crbm, X)
    cond = np.array([cond.flatten()]).T
    
    v_pos, h_pos, v_neg, h_neg = CDK(crbm, vis, cond)
    n_obs = vis.shape[1]
    
    # for a sigle observation:  dW = h * v^T - h_hat * v_hat^T
    dW = ( np.dot(h_pos, v_pos.T) - np.dot(h_neg, v_neg.T) ) * (1./n_obs)
    dA = ( np.dot(v_pos, cond.T)  - np.dot(v_neg, cond.T)  ) * (1./n_obs)
    dB = ( np.dot(h_pos, cond.T)  - np.dot(h_neg, cond.T)  ) * (1./n_obs) 
    
    dv_bias = np.mean(v_pos - v_neg, axis=1, keepdims=True)
    dh_bias = np.mean(h_pos - h_neg, axis=1, keepdims=True)
    #print("n_obs:", n_obs)
    
    rec_error = np.linalg.norm(v_pos - v_neg)
    #print( np.sqrt(np.sum((v_pos - v_neg)**2)))
    
    return dW, dA, dB, dv_bias, dh_bias, rec_error

In [43]:
X = X1[0:21,:]

In [44]:
X.shape, crbm.n_his

((21, 108), 20)

In [45]:
# Notice that the history is converted to a "long column vector" concatenating
# all the rows of the n_his vectors into a single vector of `n_vis * n_his` elements.
# This is done by `cond = np.array([cond.flatten()]).T`

dW, dA, dB, dv_bias, dh_bias, rec_error = compute_gradient(crbm, X)

In [46]:
X.shape, rec_error

((21, 108), 63.358869724309947)

### SGD 

In [64]:
def update_weights_sgd(crbm, grads, learning_rate):
    
    dW, dA, dB, dv_bias, dh_bias = grads #rec_error = compute_gradient(crbm, X)
    crbm.W += dW * learning_rate
    crbm.A += dA * learning_rate
    crbm.B += dB * learning_rate
    
    crbm.v_bias += dv_bias * learning_rate
    crbm.h_bias += dh_bias * learning_rate

In [65]:
dW, dA, dB, dv_bias, dh_bias, err = compute_gradient(crbm, X)
grads  = (dW, dA, dB, dv_bias, dh_bias)

In [66]:
update_weights_sgd(crbm, grads,  0.0001)

In [67]:
err

19.838718360558655

### Apply momentum: (TODO)

### Get slice of data

Given a timeseries where column `k` corresponds to a feature vector for the measurements of the timeseries at time `k`, we would like to take a slice of `n_his` values to feed the CRBM with a visible vector and a history.

In [59]:
X.shape

(21, 108)

In [60]:
def get_slice_at_position_k(X, k, n_his):
    """
    Returns a slice of shape  `(n_his + 1)` with the last column beeing the visible
    vector at the current time step `k`.
    """
    assert k > n_his, "Position k = {} is lower than n_his = {}".format(k, n_his)
    assert k <= X.shape[1], "Position k = {} is bigger than number of timesteps of X.shape[1] = {}".format(k, X.shape[0])
    return X[:, (k-(n_his+1)):k]

In [61]:
X_tr = X1.T
print("X_tr shape: ", X_tr.shape, "\nslice shape:", get_slice_at_position_k(X_tr, 520, crbm.n_his).shape)

X_tr shape:  (108, 1750) 
slice shape: (108, 21)


### Train a single epoch 

In [62]:
X_tr = X1.T
X_tr.shape, X_tr.shape[1],  crbm.n_vis, crbm.n_hid, crbm.n_his

((108, 1750), 1750, 108, 256, 20)

In [63]:
for k in range(crbm.n_his+1, X_tr.shape[1]+1):
    
    X_curr = get_slice_at_position_k(X_tr, k, crbm.n_his)
    dW, dA, dB, dv_bias, dh_bias, rec_error = compute_gradient(crbm, X_curr.T)
    grads = (dW, dA, dB, dv_bias, dh_bias)
    update_weights_sgd(crbm, grads,  0.0001)
    
    print("rec error: ", rec_error)

    if k == 30:
        break


rec error:  58.1938806085
rec error:  56.176794207
rec error:  53.3484168861
rec error:  52.1085234857
rec error:  50.8423648218
rec error:  50.6867620426
rec error:  47.2086629978
rec error:  43.4155435622
rec error:  42.2857903221
rec error:  41.4349546942
rec error:  39.0550614771
rec error:  38.4127170302
rec error:  36.6722492673
rec error:  35.4783744422
rec error:  34.7354175752
rec error:  35.7179314207
rec error:  35.5814284773
rec error:  31.995936162
rec error:  31.2094554516
rec error:  31.7308243821
rec error:  30.430519738
rec error:  29.9647552151
rec error:  26.8541889449
rec error:  27.1305324433
rec error:  29.7718521615
rec error:  25.6087951186
rec error:  24.2014800009
rec error:  27.5110993386
rec error:  25.4905713317
rec error:  24.6490244872
rec error:  22.2867894529
rec error:  26.3110627266
rec error:  22.7470235267
rec error:  26.2297175851
rec error:  20.9076777803
rec error:  22.1517465508
rec error:  19.4673657166
rec error:  23.0385606492
rec error:  21.

rec error:  16.6071581694
rec error:  15.6878744585
rec error:  16.8888069671
rec error:  16.0990765587
rec error:  17.402140781
rec error:  15.0639849447
rec error:  15.7448906042
rec error:  18.1508006656
rec error:  16.0490696484
rec error:  17.3901832056
rec error:  16.4413250309
rec error:  18.1138766605
rec error:  17.1759823269
rec error:  19.5830956892
rec error:  16.0365907591
rec error:  17.0943004796
rec error:  16.8768335057
rec error:  15.3962777997
rec error:  18.4725768582
rec error:  18.4590405376
rec error:  15.8086069892
rec error:  17.9424255572
rec error:  14.837412541
rec error:  16.5451157971
rec error:  16.386450579
rec error:  15.4097238498
rec error:  16.4393211976
rec error:  15.6540847207
rec error:  17.2361752764
rec error:  16.0419714069
rec error:  16.222196343
rec error:  13.8485146218
rec error:  14.3783860256
rec error:  15.4942368773
rec error:  13.6261954168
rec error:  13.6424365655
rec error:  14.018186673
rec error:  13.2835961464
rec error:  14.29

rec error:  14.6989275442
rec error:  15.2731723806
rec error:  15.8371496893
rec error:  16.9183212946
rec error:  15.8184704597
rec error:  15.4769092109
rec error:  15.1381736214
rec error:  15.7359301298
rec error:  16.0841289459
rec error:  15.5636985619
rec error:  17.1387486484
rec error:  15.8885304676
rec error:  15.6641886392
rec error:  17.4114290317
rec error:  16.7688589078
rec error:  18.0579945977
rec error:  18.6366348334
rec error:  19.3212787183
rec error:  18.3679442057
rec error:  19.9347475154
rec error:  19.0818230724
rec error:  18.047131164
rec error:  17.4467480834
rec error:  17.6983946625
rec error:  15.8422984916
rec error:  19.0124504103
rec error:  15.938675521
rec error:  17.778599359
rec error:  17.348824423
rec error:  16.0210964887
rec error:  18.7119319261
rec error:  18.2271134258
rec error:  17.3128402204
rec error:  16.7512224887
rec error:  17.8365286861
rec error:  20.226153393
rec error:  16.3221237846
rec error:  17.4834436758
rec error:  17.80

rec error:  10.8004855573
rec error:  11.5750736106
rec error:  14.0665048992
rec error:  11.9363771155
rec error:  11.5922277189
rec error:  12.4328061944
rec error:  12.5673057143
rec error:  12.4417331463
rec error:  13.3372389012
rec error:  12.121271101
rec error:  12.7919744001
rec error:  13.1136625438
rec error:  13.494681147
rec error:  12.785785565
rec error:  10.9854947505
rec error:  12.2385457817
rec error:  11.8008619691
rec error:  12.1421361768
rec error:  12.3450668107
rec error:  11.3810674421
rec error:  12.6071749769
rec error:  11.8060871495
rec error:  10.8589961572
rec error:  13.0643060808
rec error:  13.6580159829
rec error:  10.9520450594
rec error:  11.5998097082
rec error:  11.1124258359
rec error:  11.0924755805
rec error:  13.6943002809
rec error:  11.2162036461
rec error:  12.9274377138
rec error:  14.5057071241
rec error:  12.637246815
rec error:  12.4624317727
rec error:  12.1279192029
rec error:  11.6111147264
rec error:  11.5094106897
rec error:  11.5

rec error:  10.0816231756
rec error:  10.8866436903
rec error:  11.5413003238
rec error:  10.2187734894
rec error:  10.5772122434
rec error:  10.9625955378
rec error:  11.2106969617
rec error:  10.5545522541
rec error:  12.8730258082
rec error:  11.5299574844
rec error:  10.7870766478
rec error:  11.5917517131
rec error:  12.3946339089
rec error:  11.6590686194
rec error:  12.1895205141
rec error:  11.0777156979
rec error:  12.6968706767
rec error:  11.7653237192
rec error:  11.156814591
rec error:  11.7287203695
rec error:  11.7260676705
rec error:  12.0353868282
rec error:  12.2832584427
rec error:  13.023109739
rec error:  12.5906415003
rec error:  11.7801650597
rec error:  12.2320029042
rec error:  12.5177408169
rec error:  11.1907570098
rec error:  13.3152390317
rec error:  11.1498887252
rec error:  12.7698659947
rec error:  13.0340481129
rec error:  10.7202856115
rec error:  13.1929518553
rec error:  12.4424999894
rec error:  11.4431156767
rec error:  10.3056802176
rec error:  12

rec error:  9.19012970743
rec error:  9.11179201223
rec error:  10.3730053623
rec error:  9.61384070218
rec error:  9.71027143709
rec error:  9.53941884607
rec error:  9.86532684158
rec error:  9.2729462048
rec error:  9.23587023431
rec error:  9.62705627501
rec error:  10.4147799781
rec error:  9.67951118334
rec error:  9.95939275384
rec error:  9.64794481465
rec error:  9.36258074928
rec error:  9.31175508337
rec error:  9.7859618342
rec error:  9.27163825318
rec error:  9.80367975573
rec error:  9.45329284334
rec error:  9.17161140103
rec error:  9.53774427964
rec error:  9.82943335684
rec error:  9.73952677325
rec error:  9.47327644754
rec error:  9.60750265624
rec error:  9.02809827948
rec error:  8.92275312846
rec error:  9.22156295107
rec error:  10.2026576879
rec error:  8.67382982121
rec error:  8.93648507522
rec error:  8.83748789321
rec error:  8.92598300898
rec error:  9.93667752929
rec error:  11.2506064217
rec error:  11.055038048
rec error:  9.15445823297
rec error:  9.2

### Make predictions with the model

Prepare an example that trains with several data and predict feature values

```
forecast_crbm <- forecast.crbm <- function(crbm, orig_data, orig_history = NULL, n_samples = 10, n_gibbs = 30)
{
	if (is.null(orig_history))
	{
		l <- nrow(orig_data);
		orig_history <- orig_data[l - 1:crbm$delay,, drop=FALSE];
		orig_history <- array(t(orig_history), c(1, crbm$n_visible * crbm$delay));
		orig_data <- orig_data[l,, drop = FALSE];
		n_seq <- 1;
	} else {
		n_seq <- nrow(orig_data);
	}
	
	persistent_vis_chain <<- orig_data;
	persistent_history <<- orig_history;

        # construct the function that implements our persistent chain.
	sample_fn <- function(crbm, n_gibbs)
	{
		vis_sample <- persistent_vis_chain;
		v_history <- persistent_history;

		vis_mf <- NULL;
		for (k in 1:n_gibbs)
		{
			hid <- sample_h_given_v_crbm(crbm, vis_sample, v_history);
			vis <- sample_v_given_h_crbm(crbm, hid[["sample"]], v_history);

			vis_mf <- vis[["mean"]];
			vis_sample <- vis[["sample"]];
		}

		# add to updates the shared variable that takes care of our persistent chain
		persistent_vis_chain <<- vis_sample;
		persistent_history <<- cbind(vis_sample, persistent_history[,1:((crbm$delay - 1) * crbm$n_visible), drop = FALSE]);

		vis_mf;
	}

	generated_series <- array(0,c(n_seq, n_samples, crbm$n_visible));
	for (t in 1:n_samples)
	{
		#if (t %% 10 == 1) print(paste("Generating frame ", t, " to ", min(t+9, n_samples), sep = ""));
		generated_series[,t,] <- sample_fn(crbm, n_gibbs);
	}
	generated_series;
}
```

### Plot predictions