In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

# Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# One-hot encoding
encoder = OneHotEncoder(sparse=False)
y = encoder.fit_transform(y.reshape(-1, 1))

# train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1017)

In [2]:
def zlst(m):
    zlst=np.zeros((2**m,m))
    for i in range(2**m):
        z=format(i, 'b').zfill(m)
        z=np.array(list(z))
        zlst[i,:]=z
    return zlst

In [3]:
import numpy as np

def sigmoid(z):
        return 1 / (1 + np.exp(-z))

def softmax(z):
    exp_z = np.exp(z)
    return exp_z / np.sum(exp_z)

class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, lr): #p,m,g
        self.p = input_size #p
        self.m = hidden_size #m
        self.g = output_size #g
        self.lr = lr
        self.zlst=zlst(self.m)

        # weight initialize & shape construction
        np.random.seed(1183)
        self.W = np.random.randn(self.p, self.m) #p*m
        np.random.seed(202)
        self.V = np.random.randn(self.m, self.g) #m*g

    def forward(self, X): #forward propagation

        self.A1 = self.W.T @ X #m*1
        self.U = sigmoid(self.A1) #m*1

        self.A2 = self.V.T @ self.U #g*1
        self.O = softmax(self.A2) #g*1
        return self.O

########################################################################

    def M_step(self, X, y): #EM algorithm

        grad_W = np.zeros((self.p, self.m)) #p*m
        grad_V = np.zeros((self.m, self.g)) #m*g

        ####################

        for h in range(self.m):
            grad_Wh=0
            self.zlsth=self.zlst[self.zlst[:,h]==1]
            for j in range(len(X)):
                sumz, sumzh=self.E_step_W(j,X,y)
                grad_Wh += (sumzh/sumz-self.U[h])*X[j,:]

            grad_W[:,h]=grad_Wh

        ####################

        for h in range(self.m):
            self.zlsth=self.zlst[self.zlst[:,h]==1]
            for i in range(self.g):
                for j in range(len(X)):
                    sumz, sumzy = self.E_step_V(j, X, y, i)
                    grad_V[h,i] += sumzy/sumz

        ####################

        # update weight & bias
        self.W += grad_W * self.lr
        self.V += grad_V * self.lr

########################################################################

    def E_step_W(self, j, X, y):
        self.forward(X[j,:])

        sumz=0
        for z in self.zlst:
            pr_xyz=1
            for h in range(self.m):
                pr_xyz*=self.U[h]**z[h]*(1-self.U[h])**(1-z[h])
            for i in range(self.g):
                pr_xyz*=softmax(self.V.T @ z)[i]**y[j,i]
            sumz+=pr_xyz

        sumzh=0
        for z in self.zlsth:
            pr_xyz=1
            for h in range(self.m):
                pr_xyz*=self.U[h]**z[h]*(1-self.U[h])**(1-z[h])
            for i in range(self.g):
                pr_xyz*=softmax(self.V.T @ z)[i]**y[j,i]
            sumzh+=pr_xyz

        return sumz, sumzh

    def E_step_V(self, j, X, y, i):
        self.forward(X[j,:])

        sumz=0
        for z in self.zlst:
            pr_xyz=1
            for h in range(self.m):
                pr_xyz*=self.U[h]**z[h]*(1-self.U[h])**(1-z[h])
            for i in range(self.g):
                pr_xyz*=softmax(self.V.T @ z)[i]**y[j,i]
            sumz+=pr_xyz

        sumzy=0
        for z in self.zlsth:
            pr_xyz=1
            for h_ in range(self.m):
                pr_xyz*=self.U[h_]**z[h_]*(1-self.U[h_])**(1-z[h_])
            for i_ in range(self.g):
                pr_xyz*=softmax(self.V.T @ z)[i_]**y[j,i_]
            pr_xyz*=y[j,i]-softmax(self.V.T @ z)[i]
            sumzy+=pr_xyz

        return sumz, sumzy



    def Train(self, X, y, epochs):
        loss_set=list()
        for n in range(len(X)):
            y_pred = self.forward(X[n,:])
            loss = -np.sum(y[n,:]*np.log(y_pred))
            loss_set.append(loss)
        avgloss = np.mean(loss_set)
        print(f'Epoch {0}, Loss: {avgloss}')
        for epoch in range(epochs):

            self.M_step(X, y)

            losses=list()
            for n in range(len(X)):
                y_pred = self.forward(X[n,:])
                loss = -np.sum(y[n,:]*np.log(y_pred)) #Cross Entropy Loss
                losses.append(loss)
            avgloss=np.mean(losses)

            if (epoch+1) % 5 == 0: print(f'Epoch {epoch+1}, Loss: {avgloss}')

    def Test(self, X):
        testoutput=[]
        for n in range(len(X)):
            y_pred = self.forward(X[n,:])
            testoutput.append(np.argmax(y_pred))
        return testoutput

In [4]:
#setting hyperparameters
epochs=50
lr=0.005

NN=NeuralNetwork(4,7,3,lr)
NN.Train(X_train,y_train,epochs)

Epoch 0, Loss: 1.1427729846642725
Epoch 5, Loss: 0.8477192072820486
Epoch 10, Loss: 0.8088932560503146
Epoch 15, Loss: 0.8013409928099052
Epoch 20, Loss: 0.7953260351338284
Epoch 25, Loss: 0.7862787851641184
Epoch 30, Loss: 0.6765491949913677
Epoch 35, Loss: 0.6733092653604303
Epoch 40, Loss: 0.670328548725725
Epoch 45, Loss: 0.6676518890360496
Epoch 50, Loss: 0.6652140857718162


In [5]:
testoutput = NN.Test(X_test)
y_test_labels = np.argmax(y_test, axis=1)

#accuracy
accuracy = np.mean(testoutput == y_test_labels)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 93.33%


In [6]:
for i in range(10):
    pred=NN.forward(X_test[i,:])
    target=y_test[i,:]
    print(pred,target,np.argmax(pred)==np.argmax(target))

[0.936249   0.04692205 0.01682896] [1. 0. 0.] True
[0.27988311 0.32700521 0.39311167] [0. 0. 1.] True
[0.28207146 0.33332921 0.38459933] [0. 0. 1.] True
[0.95591958 0.03276875 0.01131167] [1. 0. 0.] True
[0.27057917 0.31213924 0.4172816 ] [0. 0. 1.] True
[0.33065804 0.38445328 0.28488868] [0. 1. 0.] True
[0.95479909 0.03355867 0.01164225] [1. 0. 0.] True
[0.27467353 0.31926968 0.4060568 ] [0. 0. 1.] True
[0.94697415 0.03924066 0.01378519] [1. 0. 0.] True
[0.31662273 0.37827904 0.30509823] [0. 1. 0.] True
