In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt  
from sklearn.utils import shuffle 

# Load data from Excel file
data = pd.read_excel('classification-seeds.xlsx', header=None).values
# data = pd.read_excel('classification-iris.xlsx', header=None).values
# data = pd.read_excel('dataset_diabetic.xlsx', header=None).values

# Shuffle the data
data = shuffle(data, random_state=42)

num_data = data.shape[0]
num_col = data.shape[1]
num_class = len(np.unique(data[:,num_col-1]))

# Normalize the input data
for ii in range(num_col-1):
    data[:, ii] = data[:, ii] / np.max(data[:, ii])

In [None]:
# Set parameters
percent_train = 0.7
num_train = round(num_data * percent_train)
num_test = num_data - num_train

# Convert labels to one-hot encoding (necessary for multi-class classification)
# num_classes = len(np.unique(y_train))
y_one_hot_train = np.zeros((num_train, num_class))
y_one_hot_train[np.arange(num_train), data[:num_train, num_col-1].astype(int)-1] = 1

y_one_hot_test = np.zeros((num_test, num_class))
y_one_hot_test[np.arange(num_test), data[num_train:, num_col-1].astype(int)-1] = 1

# for iris
# n0 = data.shape[1]-1
# n1 = 15
# n2 = 10
# n3 = 5
# n4 = num_class

# for seed
n0 = data.shape[1]-1
n1 = 15
n2 = 8
n3 = 5
n4 = num_class 

learning_rate1 = 0.02
learning_rate2 = 0.02
learning_rate3 = 0.02
learning_rate4 = 0.0002 #seed
# learning_rate4 = 0.002 #iris


# epoch = 35 #iris
epoch = 45 #seed


mse_train = np.zeros((epoch,num_class))
mse_test = np.zeros((epoch,num_class)) 

output_data_train = np.zeros(epoch)
output_data_test = np.zeros(epoch)

a = -1
b = 1

# Initialize weights
np.random.seed(1)
w1 = np.random.uniform(a, b, size=(n1, n0))
w2 = np.random.uniform(a, b, size=(n2, n1))
w3 = np.random.uniform(a, b, size=(n3, n2))
w4 = np.random.uniform(a, b, size=(n4, n3)) 

# Initialize biases
b1 = np.random.uniform(a, b, size=(n1,1))
b2 = np.random.uniform(a, b, size=(n2,1))
b3 = np.random.uniform(a, b, size=(n3,1))
b4 = np.random.uniform(a, b, size=(n4,1)) 

# Initialize momentum parameters 
beta = 0.9
vw1 = np.zeros((n1, n0))
vw2 = np.zeros((n2, n1))
vw3 = np.zeros((n3, n2))
vw4 = np.zeros((n4, n3))

vb1 = np.zeros((n1,1))
vb2 = np.zeros((n2,1))
vb3 = np.zeros((n3,1))
vb4 = np.zeros((n4,1))

# Regularization parameters 
lambda_reg =0.0001

In [None]:
# Return the softmax output of a vector
# epsilon to avoid divide by zero encountered
epsilon = 0.000001
def softmax(z):
    exp_z = np.exp(z)
    sum = exp_z.sum() 
    softmax_z = np.round(exp_z/(sum +epsilon ),3)
    return softmax_z

In [None]:
for t in range(epoch): 
    error_data_train = np.zeros(num_train)
    output_data_train = np.zeros(num_train) 
    
    for i in range(num_train):
        # feed-forward
        input_data = data[i, :num_col-1].reshape(-1,1)
        
        net1 = w1 @ input_data + b1
        o1 = (2/(1 + np.exp(-net1 ** 2))-1).reshape(-1,1)
        net2 = w2 @ o1 + b2
        o2 = (2 / (1 + np.exp(-net2 ** 2)) -1).reshape(-1,1)
        net3 = w3 @ o2 + b3
        o3 = (2 / (1 + np.exp(-net3 ** 2)) - 1).reshape(-1,1)
        net4 = w4 @ o3 + b4
#         o4 =  (1 / (1 + np.exp(-net4))).reshape(-1,1)  
        o4 = net4
        z = softmax(o4) 
        output_data_train[i] = np.argmax(z)+1
        
        # Backpropagation for a single sample 
        output_layer_error = -(y_one_hot_train[i:i+1] @ np.log10(z,where=z>0)).reshape(-1,1).flatten() # where=z>0 to avoid RuntimeWarning: divide by zero encountered in log10
 
        # update w4 with momentum
        w4_old = w4
        # dw4 = dE/dz*dz/do4*do4/dnet4*dnet4/dw4 = (z - target) * fprim_net4 * o3
        dw4 =  (z - y_one_hot_train[i:i+1].reshape(-1,1)) @ o3.reshape(1,-1) 
        vw4 = beta * vw4 + (1 - beta) * dw4 
        w4 = w4 - learning_rate4 * vw4 - lambda_reg * w4
        
        # update b4 with momentum 
        # db4 = dE/dz*dz/do4*do4/dnet4*dnet4/db4 = (z - target) * fprim_net4 * 1 
        db4 = (z - y_one_hot_train[i:i+1].reshape(-1,1))
        vb4 = beta * vb4 + (1 - beta) * db4 
        b4 = b4 - learning_rate4 * vb4 - lambda_reg * b4
        
        # update w3 with momentum
        w3_old = w3
        # dw3 = dE/z*dz/do4*do4/dnet4*dnet4/do3*do3/dnet3*dnet3/dw3 = (z - target) * fprim_net4 * w4 * fprim_net3 * o2
        diag_fprim_net3 = np.diag((1-o3 ** 2).flatten())  
        dw3 = diag_fprim_net3 @ w4.T @ (z - y_one_hot_train[i:i+1].reshape(-1,1)) @  o2.reshape(1,-1) 
        vw3 = beta * vw3 + (1 - beta) * dw3 
        w3 = w3 - learning_rate3 * vw3 - lambda_reg * w3
        
        # update b3 with momentum 
        # db3 =  dE/z*dz/do4*do4/dnet4*dnet4/do3*do3/dnet3*dnet3/db3 = (z - target) * fprim_net4 * w4 * fprim_net3 * 1
        db3 = diag_fprim_net3 @ w4_old.T @ (z - y_one_hot_train[i:i+1].reshape(-1,1))
        vb3 = beta * vb3 + (1 - beta) * db3 
        b3 = b3 - learning_rate3 * vb3 - lambda_reg * b3
        
        # update w2 with momentum   
        w2_old = w2
        diag_fprim_net2 = np.diag((1-o2 ** 2).flatten())
        # dw2 = dE/dz*dz/do4*do4/dnet4*dnet4/do3*do3/dnet3*dnet3/do2*do2/dnet2*dnet2/dw2 = (z - target) * fprim_net4 * w4 * fprim_net3 * w3 * fprim_net2 *o1
        dw2 = diag_fprim_net2 @ w3_old.T @ diag_fprim_net3 @ w4_old.T @ (z - y_one_hot_train[i:i+1].reshape(-1,1)) @ o1.reshape(1,-1) 
        vw2 = beta * vw2 + (1 - beta) * dw2 
        w2 = w2 - learning_rate2 * vw2 - lambda_reg * w2
    
        # update b2 with momentum   
        # db2 = dE/dz*dz/do4*do4/dnet4*dnet4/do3*do3/dnet3*dnet3/do2*do2/dnet2*dnet2/db2 = (z - target) * fprim_net4 * w4 * fprim_net3 * w3 * fprim_net2 * 1
        db2 = diag_fprim_net2 @ w3_old.T @ diag_fprim_net3 @ w4_old.T @ (z - y_one_hot_train[i:i+1].reshape(-1,1)) 
        vb2 = beta * vb2 + (1 - beta) * db2 
        b2 = b2 - learning_rate2 * vb2 - lambda_reg * b2
        
        # update w1 with momentum    
        # dw1 = dE/dz*dz/do4*do4/dnet4*dnet4/do3*do3/dnet3*dnet3/do2*do2/dnet2*dnet2/do1*do1/dnet1*dnet1/dw1 = (z - target) * fprim_net4 * w4 * fprim_net3 * w3 * fprim_net2 * w2 * fprim_net1 * x
        diag_fprim_net1 = np.diag((1-o1 ** 2).flatten())
        dw1 = diag_fprim_net1 @ w2_old.T @ diag_fprim_net2 @  w3_old.T @  diag_fprim_net3 @ w4_old.T @  (z - y_one_hot_train[i:i+1].reshape(-1,1)) @ input_data.reshape(1,-1)
        vw1 = beta * vw1 + (1 - beta) * dw1
        w1 = w1 - learning_rate1 * vw1 - lambda_reg * w1
        
        # update b1 with momentum    
        # db1 = dE/dz*dz/do4*do4/dnet4*dnet4/do3*do3/dnet3*dnet3/do2*do2/dnet2*dnet2/do1*do1/dnet1*dnet1/db1 = (z - target) * fprim_net4 * w4 * fprim_net3 * w3 * fprim_net2 * w2 * fprim_net1 
        db1 = diag_fprim_net1 @ w2_old.T @ diag_fprim_net2 @  w3_old.T @  diag_fprim_net3 @ w4_old.T @  (z - y_one_hot_train[i:i+1].reshape(-1,1))
        vb1 = beta * vb1 + (1 - beta) * db1
        b1 = b1 - learning_rate1 * vb1 - lambda_reg * b1
    
        error_data_train[i] = output_layer_error[0]

    mse_train[t] = np.mean(error_data_train ** 2,axis=0) 
    
    error_data_test = np.zeros(num_test)
    output_data_test = np.zeros(num_test)
    for i in range(num_test):
        input_data = data[num_train + i, :num_col-1].reshape(-1,1) 
        net1 = w1 @ input_data + b1 
        o1 = (2/(1 + np.exp(-net1 ** 2))-1).reshape(-1,1) 
        net2 = w2 @ o1 + b2
        o2 = (2 / (1 + np.exp(-net2 ** 2)) -1).reshape(-1,1)
        net3 = w3 @ o2 + b3
        o3 = (2 / (1 + np.exp(-net3 ** 2)) - 1).reshape(-1,1)
        net4 = w4 @ o3 + b4
#         o4 =  (1 / (1 + np.exp(-net4))).reshape(-1,1)   
        o4 = net4
        z = softmax(o4) 
        output_data_test[i] = np.argmax(z)+1
        
        error = -(y_one_hot_test[i:i+1] @ np.log10(z,where=z>0)).flatten() 

        error_data_test[i] = error[0]

    mse_test[t] = np.mean(error_data_test ** 2,axis=0)

    plt.figure(1)
    plt.subplot(2, 2, 1)
    plt.plot(data[:num_train, num_col-1], '-sr')
    plt.plot(output_data_train, '-*b')
    plt.xlabel('Train Data')
    plt.ylabel('Output')   
    
#         Plotting the training MSE
    plt.subplot(2, 2, 2)
    plt.semilogy(np.arange(1, t + 1), mse_train[:t])
    plt.xlabel('Epoch')
    plt.ylabel('MSE Train')
    

    plt.subplot(2, 2, 3)    
    plt.plot(data[num_train:, num_col-1], '-sr')
    plt.plot(output_data_test, '-*b')
    plt.xlabel('Test Data')
    plt.ylabel('Output')
    
        # Plotting the test MSE
    plt.subplot(2, 2, 4)
    plt.semilogy(np.arange(1, t + 1), mse_test[:t])
    plt.xlabel('Epoch')
    plt.ylabel('MSE Test')

    
    print('Epoch: {} \t'.format(t+1))
    print('MSE_train: ',mse_train[t],' MSE_Test: ' ,mse_test[t])
    
    plt.tight_layout()
    plt.show()
    
    print("\n\033[1;m" + "*" * 125)
    
    
plt.figure(2)
m_train , b_train = np.polyfit(data[:num_train, num_col - 1], output_data_train, 1)    
plt.scatter(data[:num_train, num_col - 1], output_data_train,facecolors='none',edgecolors='#104E8B')
plt.plot(data[:num_train, num_col - 1], m_train*data[:num_train, num_col - 1]+b_train,'r')
# plt.plot(data[:num_train, 3], output_data_train, label='Regression Line', color='red')
plt.title('Regression Train') 

plt.figure(3)
m_test , b_test = np.polyfit(data[num_train:, num_col - 1], output_data_test, 1)  
plt.scatter(data[num_train:, num_col - 1], output_data_test,facecolors='none',edgecolors='#104E8B')
plt.plot(data[num_train:, num_col - 1], m_test*data[num_train:,num_col - 1]+b_test,'r')
plt.title('Regression Test')
 
mse_train_result = mse_train[-1]
mse_test_result = mse_test[-1]

print("Final MSE on Train Data:", mse_train_result)
print("Final MSE on Test Data:", mse_test_result)

In [None]:
# Assuming you have converted regression predictions to classes
predicted_classes = output_data_train 
actual_classes = data[:num_train,num_col-1] 

# Define the number of classes

# Create a confusion matrix-like matrix
confusion_matrix = np.zeros((num_class, num_class))

# Fill the confusion matrix
for actual, predicted in zip(actual_classes, predicted_classes):
    confusion_matrix[actual.astype(int)-1][predicted.astype(int)-1] += 1

# Plot the confusion matrix
plt.imshow(confusion_matrix, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Train Confusion Matrix ')
plt.colorbar()

# Annotate the plot with numbers
for i in range(num_class):
    for j in range(num_class):
        plt.text(j, i, str(int(confusion_matrix[i, j])), fontsize=12, ha='center', va='center')  # Corrected

plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.xticks(np.arange(num_class), np.arange(1, num_class + 1))
plt.yticks(np.arange(num_class), np.arange(1, num_class + 1))

plt.show()


In [None]:
# Assuming you have converted regression predictions to classes
predicted_classes = output_data_test 
actual_classes = data[num_train:,num_col-1] 

# Define the number of classes

# Create a confusion matrix-like matrix
confusion_matrix = np.zeros((num_class, num_class))

# Fill the confusion matrix
for actual, predicted in zip(actual_classes, predicted_classes):
    confusion_matrix[actual.astype(int)-1][predicted.astype(int)-1] += 1

# Plot the confusion matrix
plt.imshow(confusion_matrix, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Test Confusion Matrix ')
plt.colorbar()

# Annotate the plot with numbers
for i in range(num_class):
    for j in range(num_class):
        plt.text(j, i, str(int(confusion_matrix[i, j])), fontsize=12, ha='center', va='center')  # Corrected
# Adjust ticks and labels to start from 1
        
plt.xlabel('Predicted')
plt.ylabel('Actual')
# plt.xticks(np.arange(num_class))
# plt.yticks(np.arange(num_class))
plt.xticks(np.arange(num_class), np.arange(1, num_class + 1))
plt.yticks(np.arange(num_class), np.arange(1, num_class + 1))

plt.show()



In [None]:
train_accuracy = np.mean(output_data_train == data[:num_train,num_col-1] ) 
print(f"Accuracy on the train set: {train_accuracy * 100}%")

test_accuracy = np.mean(output_data_test == data[num_train:,num_col-1]) 
print(f"Accuracy on the test set: {test_accuracy * 100}%")