In [2]:
import numpy as np
import tensorflow as tf
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from tensorflow.keras import layers, models

# Load and preprocess data
digits = load_digits()
X = digits.data
y = digits.target
X = StandardScaler().fit_transform(X).astype(np.float32)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define RBM class (basic implementation)
class RBM(tf.keras.Model):
    def __init__(self, n_visible, n_hidden):
        super(RBM, self).__init__()
        self.n_visible = n_visible
        self.n_hidden = n_hidden
        self.W = tf.Variable(tf.random.normal([n_visible, n_hidden], stddev=0.01))
        self.h_bias = tf.Variable(tf.zeros([n_hidden]))
        self.v_bias = tf.Variable(tf.zeros([n_visible]))

    def sample_prob(self, probs):
        return tf.nn.relu(tf.sign(probs - tf.random.uniform(tf.shape(probs))))

    def call(self, v):
        h_prob = tf.nn.sigmoid(tf.matmul(v, self.W) + self.h_bias)
        h_sample = self.sample_prob(h_prob)
        v_prob = tf.nn.sigmoid(tf.matmul(h_sample, tf.transpose(self.W)) + self.v_bias)
        return v_prob, h_sample

    def train_step(self, v0, lr=0.01):
        with tf.GradientTape() as tape:
            h_prob0 = tf.nn.sigmoid(tf.matmul(v0, self.W) + self.h_bias)
            h_sample0 = self.sample_prob(h_prob0)
            v_prob, h_sample = self.call(v0)

            positive_grad = tf.matmul(tf.transpose(v0), h_prob0)
            negative_grad = tf.matmul(tf.transpose(v_prob), tf.nn.sigmoid(tf.matmul(v_prob, self.W) + self.h_bias))

            dW = (positive_grad - negative_grad) / tf.cast(tf.shape(v0)[0], tf.float32)
            dvb = tf.reduce_mean(v0 - v_prob, 0)
            dhb = tf.reduce_mean(h_prob0 - tf.nn.sigmoid(tf.matmul(v_prob, self.W) + self.h_bias), 0)

        self.W.assign_add(lr * dW)
        self.v_bias.assign_add(lr * dvb)
        self.h_bias.assign_add(lr * dhb)

# Layer-wise training
rbm1 = RBM(64, 256)
rbm2 = RBM(256, 128)

# Train RBM1
for epoch in range(5):
    for i in range(0, X_train.shape[0], 32):
        batch = X_train[i:i+32]
        rbm1.train_step(batch)

# Get output from RBM1 to feed into RBM2
_, h1_train = rbm1.call(X_train)
_, h1_test = rbm1.call(X_test)

# Train RBM2
for epoch in range(5):
    for i in range(0, h1_train.shape[0], 32):
        batch = h1_train[i:i+32]
        rbm2.train_step(batch)

# Get output from RBM2 for final layer
_, h2_train = rbm2.call(h1_train)
_, h2_test = rbm2.call(h1_test)

# Fine-tune with backprop (supervised)
model = models.Sequential([
    layers.Input(shape=(128,)),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(h2_train, y_train, epochs=10, validation_data=(h2_test, y_test), verbose=0)

# Evaluate DBN
dbn_preds = model.predict(h2_test)
dbn_preds = np.argmax(dbn_preds, axis=1)
dbn_acc = accuracy_score(y_test, dbn_preds)
print(f"Accuracy of DBN (Stacked RBMs + Fine-tuning): {dbn_acc:.4f}")

# Baseline: Regular deep network (no RBM pretraining)
baseline = models.Sequential([
    layers.Input(shape=(64,)),
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(10, activation='softmax')
])
baseline.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
baseline.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test), verbose=0)

# Evaluate baseline
baseline_preds = baseline.predict(X_test)
baseline_preds = np.argmax(baseline_preds, axis=1)
baseline_acc = accuracy_score(y_test, baseline_preds)
print(f"Accuracy of baseline deep network: {baseline_acc:.4f}")


[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Accuracy of DBN (Stacked RBMs + Fine-tuning): 0.2593
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
Accuracy of baseline deep network: 0.9796
