In [18]:
###############################################
## Import necessary libraries and files
###############################################
import numpy as np
try:
    import cupy
    if cupy.cuda.is_available():
        np = cupy
except:
    pass
#import cupy as cp
import matplotlib.pyplot as plt
from skimage.metrics import structural_similarity as ssim
import time
import sys
from rbm import RBM
from aux import binarize_data

In [22]:
###############################################
## Control Panel
###############################################

# hyperparameters of the RBM
hyper = ['mnist', 'hKH', 'lecun', 2e-3, 0.7, 100]

addrss = 'out/'
label = 'p1.5decay'
#dataset = sys.argv[1]        #{'mnist','kmnist'} --dataset
dataset = hyper[0]            #{'mnist','kmnist'} --dataset
#KH = sys.argv[2]             #{None, 'hKH', 'vKH', 'hvKH'} --KH Modulation approach
KH = hyper[1]                 #{None, 'hKH', 'vKH', 'hvKH'} --KH Modulation approach
training = 'CD'               #{'CD', 'PCD'} --training algorithm of the RBM
k = 1                         # number of Gibbs sampling steps
epochs = 1                  # number of epochs (S)
#W_init = sys.argv[3]         #{'lecun', 'std'} --weight initialization
W_init = hyper[2]             #{'lecun', 'std'} --weight initialization
#eps0 = float(sys.argv[4])    # KH learning rate
eps0 = hyper[3]               # KH learning rate
eps_d = True                  #{False, True} -- decays with a polynomial eps0*(1- t/epochs)^(3/2)
#delta = float(sys.argv[5])   # KH penalty term
delta = hyper[4]              # KH penalty term
seed = 1234                   # random seed
p = 2.0                       # KH Lebesgue power
l = 2                         # KH ranking parameter l >= 2
if W_init == 'std':
    R = 1.0                   # Lebesgue p norm radius
else:
    R = 0.1                   # Lebesgue p norm radius
batch_size = 100
eta = 1e-1                    # learning rate of the RBM
N = 784                       # number of visible units
#M = int(sys.argv[6])         # number of hidden units
M = hyper[5]                  # number of hidden units

In [23]:
import tensorflow
# Load MNIST dataset
# mnist = mnist
(train_data, train_labels), (test_data, test_labels) = tensorflow.keras.datasets.mnist.load_data()

train_data = np.array(train_data).astype(np.float32)
test_data = np.array(test_data).astype(np.float32)

train_labels = np.array(train_labels)
test_labels = np.array(test_labels)

# Get indices for random shuffling
indices = np.arange(len(train_data))
np.random.shuffle(indices)

# Use the shuffled indices to split the dataset
#split_index = int(0.8 * len(train_images))
split_index = 50000
train_indices, val_indices = indices[:split_index], indices[split_index:]

# Split the dataset
train_data, valid_data = train_data[train_indices], train_data[val_indices]
train_labels, valid_labels = train_labels[train_indices], train_labels[val_indices]

train_data = train_data.reshape(len(train_data),int(train_data.shape[1]*train_data.shape[2]))
valid_data = valid_data.reshape(len(valid_data),int(valid_data.shape[1]*valid_data.shape[2]))
test_data = test_data.reshape(len(test_data),int(test_data.shape[1]*test_data.shape[2]))

print("Training Images Shape:", train_data.shape)
print("Training Labels Shape:", train_labels.shape)
print("Validation Images Shape:", valid_data.shape)
print("Validation Labels Shape:", valid_labels.shape)
print("Test Images Shape:", test_data.shape)
print("Test Labels Shape:", test_labels.shape)

Training Images Shape: (50000, 784)
Training Labels Shape: (50000,)
Validation Images Shape: (10000, 784)
Validation Labels Shape: (10000,)
Test Images Shape: (10000, 784)
Test Labels Shape: (10000,)


In [24]:
###############################################
## Loading and preprocessing dataset
###############################################

#path = './'

#load_data = np.load(path+dataset+'.npz')
#train_data = load_data['tr_data']
#valid_data = load_data['vl_data']
#test_data = load_data['t_data']

train_data = np.concatenate((train_data,valid_data))

X_train = binarize_data(train_data)
X_valid = binarize_data(test_data)

X_train = np.array(X_train)
X_valid = np.array(X_valid)

In [25]:
start_time = time.time()
cp.random.seed(seed)
rbm = RBM(N=N,M=M,eta=eta,batch_size=batch_size,W_init=W_init)
rbm.train(X_train,X_valid, epochs=epochs, k=k,training=training,KH=KH,R=R,l=l,delta=delta, p=p,eps0=eps0,
          label=label,addrss=addrss,save_checkpoints=False,track_learning=True,save_learn_funcs=True,save_params=True,
          plot_weights=False,eps_d=eps_d,dataset=dataset,seed=seed)
end_time = time.time()  # Record the end time for the entire training process
total_training_time = end_time - start_time
print('Total training time: %.2f seconds' % total_training_time)

Total training time: 37.47 seconds
