In [1]:
%matplotlib inline

from __future__ import absolute_import, print_function, unicode_literals, division
from sklearn.datasets import fetch_mldata
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import itertools
import random as rand
import copy

In [2]:
alice = {"Interstellar":1, "Whiplash":1, "Selma":0, "Lego Movie":0, "Birdman":0}
eric = {"Interstellar":0, "Whiplash":0, "Selma":0, "Lego Movie":1, "Birdman":0}
nancy = {"Interstellar":1, "Whiplash":1, "Selma":1, "Lego Movie":0, "Birdman":1}
sarah = {"Interstellar":0, "Whiplash":1, "Selma":0, "Lego Movie":0, "Birdman":1}
#mike = {"Interstellar":1, "Whiplash":1, "Selma":1, "Lego Movie":1, "Birdman":1}
#bob = {"Interstellar":0, "Whiplash":1, "Selma":1, "Lego Movie":0, "Birdman":1}

data = {"alice":alice, "eric":eric, "nancy":nancy, "sarah":sarah} #"mike":mike, "bob":bob}

In [66]:
data = pd.DataFrame.from_dict(data)
data.head()

Unnamed: 0,alice,eric,nancy,sarah
Birdman,0,0,1,1
Interstellar,1,0,1,0
Lego Movie,0,1,0,0
Selma,0,0,1,0
Whiplash,1,0,1,1


In [71]:
data_array = np.array(data)
print(data_array)
print(data_array.shape)

[[0 0 1 1]
 [1 0 1 0]
 [0 1 0 0]
 [0 0 1 0]
 [1 0 1 1]]
(5, 4)


In [81]:
class RBM(object):
    """
    Implementation of RBM
    num_vis and num_hidden do not include bias 
    """

    def __init__(self, data, num_hidden, num_visible, learning_rate):
        # set variables based on input
        self.data = data
        self.data_wbias = copy.deepcopy(self.data)
        self.num_hidden = num_hidden
        self.num_vis = num_visible
        self.learning_rate = learning_rate

        # set weight matrix first row and first column to be RBM bias
        self.hidden_states = np.random.rand(self.num_hidden + 1)
        self.num_data = data.shape[0]
        self.visible_states = np.concatenate((np.ones((self.num_data,1)), self.data),axis=1)
        self.weights = np.random.rand(self.visible_states.shape[1], self.num_hidden + 1)

    # Logistic Sigmoid Function
    def _sigmoid(self,x):
        return 1.0/(1.0+np.exp(-x))
    
    # Binomial Sampling Function
    def _binom_sample(self, a, b):
        prob = self._sigmoid(np.dot(a,b))
        sample = [np.random.binomial(n=1, p=i) for i in prob]
        return np.array(sample)
    
    # Predict hidden given visible
    def _sample_h_given_v(self, v):
        return self._binom_sample(self.weights.T, v.T)

    # Predict visible given hidden
    def _sample_v_given_h(self, h):
        return self._binom_sample(self.weights, h)
    
    def propagate_up(self, vis):
        """
        Propagates visible layer activation to hidden layer
        """
        # add in bias
        vis = np.insert(vis, 0, 1, axis=0)
        vis = np.insert(vis, 0, 1, axis=1)
        
        print("VIS")
        print(vis), vis.shape
        
        vis_activation = self._sigmoid(np.dot(vis, self.weights))
        sample = [np.random.binomial(n=1, p=i) for i in vis_activation]
        
        print("VIS ACTIVATION")
        print(vis_activation)
        
        return [vis_activation, sample]

    def propagate_down(self, hid):
        """
        Propagates hidden layer activation to visible layer
        """
        print(hid)
        hidden_activation = self._sigmoid(np.dot(hid, self.weights.T))
        return hidden_activation

    
    def CDk(self, max_epochs=1000):
        """
        Trains the RBM
        """
        
        for epoch in xrange(0,max_epochs):

            data = np.insert(self.data, 0, 1, axis=1)
            data = np.insert(data, 0, 1, axis=0)
            
            # CDk positive phase
            up_data = self.propagate_up(self.data)
            up_associations = np.dot(data.T, up_data[1])
            
            # CDk negative phase
            down_vis_probs = self.propagate_down(up_data[0])
            
            print("DOWN VIS PROBS")
            print(down_vis_probs)
            
            down_vis_probs = down_vis_probs[1:,:]#remove the bias layer
            
            print("DOWN VIS PROBS REMOVE BIAS")
            print(down_vis_probs)
            
            down_associations = self.propagate_down(down_vis_probs.T) 
            down_associations = np.dot(down_vis_probs.T, down_associations)
            
            self.weights += self.learning_rate * \
                ((up_associations - down_associations)/self.num_data)
                
            error = np.sum((data - down_vis_probs) ** 2)
            if epoch == 1000:
                return down_vis_probs
            #print("Epoch: ", epoch, ", Error: ", error)
    
    
    def Gibbs_alternating(num_gen_samples):
        samples = np.ones((num_gen_samples, self.num_vis + 1))
        samples[0,1:] = np.random.rand(self.num_vis)
        for i in xrange(0, self.num_data-1):
            # calculate hidden from visible
            v = samples [i,:]
            h = _sample_h_given_v(v)
            h[0] = 1
            # calculate visible
            v = _sample_v_given_h(h)
            samples[i+1,:] = v      
        return samples[:,1:]
    

In [82]:
r = RBM(data_array, data_array.shape[0], 3, .1)
sample = r.CDk(max_epochs=100)
print(r.weights)
user = np.array([[1, 0, 0, 0, 1, 0]])
print(r._sample_h_given_v(user))

VIS
[[1 1 1 1 1]
 [1 0 0 1 1]
 [1 1 0 1 0]
 [1 0 1 0 0]
 [1 0 0 1 0]
 [1 1 0 1 1]]
VIS ACTIVATION
[[ 0.85059376  0.85237934  0.90854644  0.93278191  0.90128655  0.96741815]
 [ 0.81047483  0.80124824  0.87300753  0.84644308  0.64791178  0.82882799]
 [ 0.77022768  0.79788111  0.84500552  0.79045187  0.78444256  0.86630338]
 [ 0.57523079  0.701423    0.75856157  0.58647006  0.77118222  0.81297976]
 [ 0.72429806  0.74883416  0.82555145  0.67209766  0.62181487  0.72908781]
 [ 0.84511804  0.84221655  0.8878881   0.91026992  0.80287486  0.92099956]]
[[ 0.85059376  0.85237934  0.90854644  0.93278191  0.90128655  0.96741815]
 [ 0.81047483  0.80124824  0.87300753  0.84644308  0.64791178  0.82882799]
 [ 0.77022768  0.79788111  0.84500552  0.79045187  0.78444256  0.86630338]
 [ 0.57523079  0.701423    0.75856157  0.58647006  0.77118222  0.81297976]
 [ 0.72429806  0.74883416  0.82555145  0.67209766  0.62181487  0.72908781]
 [ 0.84511804  0.84221655  0.8878881   0.91026992  0.80287486  0.92099956]]


ValueError: shapes (5,5) and (6,5) not aligned: 5 (dim 1) != 6 (dim 0)