In [14]:

import tensorflow as tf
import numpy as np
import tensorflow.compat.v1 as tf1
import tensorflow.keras as keras
import math
tf1.disable_eager_execution()
class RBM(object):
    def __init__(self,input_size,output_size,learning_rate=1.0):
        self._input_size=input_size
        self._output_size=output_size
        self.w=np.zeros([input_size,output_size],np.float32)
        self.hb=np.zeros([output_size],np.float32)
        self.vb=np.zeros([input_size],np.float32)

        self.learning_rate=learning_rate
    # Fits the result from the weighted visible layer plus the bias into a sigmoid curve
    def prob_h_given_v(self, visible, w, hb):
        # Sigmoid
        return tf.math.sigmoid(tf.linalg.matmul(visible, w) + hb)
     # Fits the result from the weighted hidden layer plus the bias into a sigmoid curve
    def prob_v_given_h(self, hidden, w, vb):
        return tf.math.sigmoid(tf.linalg.matmul(hidden, tf.transpose(w)) + vb)
    def sample_prob(self,probs):
        return tf.nn.relu(tf.sign(probs-tf.random.uniform(tf.shape(probs))))
    def train(self,X,epochs=2,batchsize=128):
        _w=tf1.placeholder("float",[self._input_size,self._output_size])
        _hb=tf1.placeholder("float",[self._output_size])
        _vb=tf1.placeholder("float",[self._input_size])

        prv_w=np.zeros([self._input_size,self._output_size],np.float32)
        prv_hb=np.zeros([self._output_size],np.float32)
        prv_vb=np.zeros([self._input_size],np.float32)

        cur_w=np.zeros([self._input_size,self._output_size],np.float32)
        cur_hb=np.zeros([self._output_size],np.float32)
        cur_vb=np.zeros([self._input_size],np.float32)
        v0=tf1.placeholder("float",[None,self._input_size])

        h0=self.sample_prob(self.prob_h_given_v(v0,_w,_hb))
        v1=self.sample_prob(self.prob_v_given_h(h0,_w,_vb))
        h1=self.prob_h_given_v(v1,_w,_hb)

        positive_grad=tf.linalg.matmul(tf.transpose(v0),h0)
        negative_grad=tf.linalg.matmul(tf.transpose(v1),h1)

        update_w=_w+self.learning_rate*(positive_grad-negative_grad)/tf.cast(tf.shape(v0)[0],dtype=tf.float32)
        update_vb=_vb+self.learning_rate*tf.reduce_mean(v0-v1,0)
        update_hb=_hb+self.learning_rate*tf.reduce_mean(h0-h1,0)

        err=tf.reduce_mean(tf.square(v0-v1))

        with tf1.Session() as sess:
            sess.run(tf1.global_variables_initializer())
            for epoch in range(epochs):
                for start,end in zip(range(0,len(X),batchsize),range(batchsize,len(X),batchsize)):
                    batch=X[start:end]
                    cur_w  = sess.run(update_w,  feed_dict={v0: batch, _w: prv_w, _hb: prv_hb, _vb: prv_vb})
                    cur_hb = sess.run(update_hb, feed_dict={v0: batch, _w: prv_w, _hb: prv_hb, _vb: prv_vb})
                    cur_vb = sess.run(update_vb, feed_dict={v0: batch, _w: prv_w, _hb: prv_hb, _vb: prv_vb})
                    prv_w=cur_w
                    prv_hb=cur_hb
                    prv_vb=cur_vb
                error=sess.run(err,feed_dict={v0:X,_w: cur_w, _vb: cur_vb, _hb: cur_hb})


                print('Epoch: %d' % epoch, 'reconstruction error: %f' % error)

            self.w=prv_w
            self.hb=prv_hb
            self.vb=prv_vb
    
    def rbm_outpt(self, X):
        input_X = tf.constant(X,dtype=tf.float32)
        _w = tf.constant(self.w)
        _hb = tf.constant(self.hb)
        out = tf.math.sigmoid(tf.linalg.matmul(input_X, _w) + _hb)
        with tf1.Session() as sess:
            sess.run(tf1.global_variables_initializer())
            return sess.run(out)


mnist = keras.datasets.mnist

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
#rbm=RBM(784,500)
#print(train_images[0])
train_images=train_images.reshape(-1,784)
train_images=train_images/255.0
#print("new:",train_images[0])
#rbm.train(train_images)
#print(rbm.rbm_outpt(train_images[:10]))
print("success")

success


In [15]:

'''训练每一个RBM'''
RBM_hidden_sizes = [500, 200 , 50 ] #create 4 layers of RBM with size 785-500-200-50

#Since we are training, set input as training data
inpX = train_images

#Create list to hold our RBMs
rbm_list = []

#Size of inputs is the number of inputs in the training set
input_size = inpX.shape[1]
#with tf.device('/gpu:0'):
#For each RBM we want to generate
for i, size in enumerate(RBM_hidden_sizes):
    print('RBM: ',i,' ',input_size,'->', size)
    rbm_list.append(RBM(input_size, size))
    input_size = size
#For each RBM in our list
for rbm in rbm_list:
    print('New RBM:')
    #Train a new one
    rbm.train(inpX) 
    #Return the output layer
    inpX = rbm.rbm_outpt(inpX)

RBM:  0   784 -> 500
RBM:  1   500 -> 200
RBM:  2   200 -> 50
New RBM:
Epoch: 0 reconstruction error: 0.064268
Epoch: 1 reconstruction error: 0.054660
New RBM:
Epoch: 0 reconstruction error: 0.031731
Epoch: 1 reconstruction error: 0.028690
New RBM:
Epoch: 0 reconstruction error: 0.065925
Epoch: 1 reconstruction error: 0.062902


In [16]:
class NN(object):

    def __init__(self, sizes, X, Y):
        # Initialize hyperparameters
        self._sizes = sizes
        self._X = X
        self._Y = Y
        self.w_list = []
        self.b_list = []
        self._learning_rate = 1.0
        self._momentum = 0.0
        self._epoches = 10
        self._batchsize = 100
        input_size = X.shape[1]

        # initialization loop
        for size in self._sizes + [Y.shape[1]]:
            # Define upper limit for the uniform distribution range
            max_range = 4 * math.sqrt(6. / (input_size + size))

            # Initialize weights through a random uniform distribution
            self.w_list.append(
                np.random.uniform(-max_range, max_range, [input_size, size]).astype(np.float32))

            # Initialize bias as zeroes
            self.b_list.append(np.zeros([size], np.float32))
            input_size = size

    # load data from rbm
    def load_from_rbms(self, dbn_sizes, rbm_list):
        # Check if expected sizes are correct
        assert len(dbn_sizes) == len(self._sizes)

        for i in range(len(self._sizes)):
            # Check if for each RBN the expected sizes are correct
            assert dbn_sizes[i] == self._sizes[i]

        # If everything is correct, bring over the weights and biases
        for i in range(len(self._sizes)):
            self.w_list[i] = rbm_list[i].w
            self.b_list[i] = rbm_list[i].hb

    # Training method
    def train(self):
        # Create placeholders for input, weights, biases, output
        _a = [None] * (len(self._sizes) + 2)
        _w = [None] * (len(self._sizes) + 1)
        _b = [None] * (len(self._sizes) + 1)
        _a[0] = tf1.placeholder("float", [None, self._X.shape[1]])
        y = tf1.placeholder("float", [None, self._Y.shape[1]])

        # Define variables and activation functoin
        for i in range(len(self._sizes) + 1):
            _w[i] = tf1.Variable(self.w_list[i])
            _b[i] = tf1.Variable(self.b_list[i])
        for i in range(1, len(self._sizes) + 2):
            _a[i] = tf.math.sigmoid(tf.linalg.matmul(_a[i - 1], _w[i - 1]) + _b[i - 1])

        # Define the cost function
        cost = tf.reduce_mean(tf.square(_a[-1] - y))

        # Define the training operation (Momentum Optimizer minimizing the Cost function)
        train_op = tf1.train.MomentumOptimizer(
            self._learning_rate, self._momentum).minimize(cost)

        # Prediction operation
        predict_op = tf.argmax(_a[-1], 1)

        # Training Loop
        with tf1.Session() as sess:
            # Initialize Variables
            sess.run(tf1.global_variables_initializer())

            # For each epoch
            for i in range(self._epoches):

                # For each step
                for start, end in zip(
                        range(0, len(self._X), self._batchsize), range(self._batchsize, len(self._X), self._batchsize)):
                    # Run the training operation on the input data
                    sess.run(train_op, feed_dict={
                        _a[0]: self._X[start:end], y: self._Y[start:end]})
                    t=sess.run(_a[-1])
                    print(t.shape)

                for j in range(len(self._sizes) + 1):
                    # Retrieve weights and biases
                    self.w_list[j] = sess.run(_w[j])
                    self.b_list[j] = sess.run(_b[j])

                print("Accuracy rating for epoch " + str(i) + ": " + str(np.mean(np.argmax(self._Y, axis=1) == \
                                                                                 sess.run(predict_op, feed_dict={_a[0]: self._X, y: self._Y}))))

In [17]:
trX,trY=train_images, train_labels
trY=np.eye(10)[trY]
print(trX.shape)
print(trY.shape)

(60000, 784)
(60000, 10)


In [23]:
with tf.device('/cpu:0'):
    nNet = NN(RBM_hidden_sizes, trX, trY)
    nNet.load_from_rbms(RBM_hidden_sizes,rbm_list)
    nNet.train()

InvalidArgumentError: Cannot assign a device for operation Variable_16/IsInitialized/VarIsInitializedOp: node Variable_16/IsInitialized/VarIsInitializedOp (defined at <ipython-input-16-e85014418ee1>:54)  was explicitly assigned to /device:GPU:0 but available devices are [ /job:localhost/replica:0/task:0/device:CPU:0, /job:localhost/replica:0/task:0/device:XLA_CPU:0 ]. Make sure the device specification refers to a valid device. The requested device appears to be a GPU, but CUDA is not enabled.
	 [[Variable_16/IsInitialized/VarIsInitializedOp]]