# a very basic neural network formulation and implementation

note: the implementation will be using numpy only 

In [2]:
import numpy as np 

In [None]:
class MLP: 
    
    def __init__(self, dims, lr):
        self.dims = dims 
        self.lr = lr 
        
        #dims = [input dim, hidden dim, output dim]
        self.W1 = np.random.randn(dims[0], dims[1]) 
        self.b1 = np.zeros((1, dims[1]))
        self.W2 = np.random.randn(dims[1], dims[2])
        self.b2 = np.zeros((1, dims[2]))
        
    def activation(self, x):
        return np.maximum(0, x)
    
    def softmax(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)
    
    def forward(self, X):
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self.activation(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self.softmax(self.z2)
        return self.a2
    
    def loss(self, y_true, y_pred):
        m = y_true.shape[0]
        log_likelihood = -np.log(y_pred[range(m), y_true])
        loss = np.sum(log_likelihood) / m
        return loss 
    
    def backward(self, x):
        m = x.shape[0]
        delta2 = self.a2.copy()
        delta2[range(m), x] -= 1
        delta2 /= m

        dW2 = np.dot(self.a1.T, delta2)
        db2 = np.sum(delta2, axis=0, keepdims=True)

        delta1 = np.dot(delta2, self.W2.T) * (self.z1 > 0)
        dW1 = np.dot(x.T, delta1)
        db1 = np.sum(delta1, axis=0, keepdims=True)

        return dW1, db1, dW2, db2
    
    def update(self, dW1, db1, dW2, db2):
        self.W1 -= self.lr * dW1
        self.b1 -= self.lr * db1
        self.W2 -= self.lr * dW2
        self.b2 -= self.lr * db2
        
    def train(self, X, y, epochs):
        for epoch in range(epochs):
            y_pred = self.forward(X
            loss = self.loss(y, y_pred)
            dW1, db1, dW2, db2 = self.backward(y)
            self.update(dW1, db1, dW2, db2)

            if epoch % 100 == 0:
                print(f'Epoch {epoch}, Loss: {loss:.4f}')
                

        

In [12]:
# Example data
X = np.random.rand(100, 20)
y = np.random.randint(0, 2, size=100)

# Initialize model
model = MLP(dims=[20, 10, 2], lr=0.001)

# Train model
model.train(X, y, epochs=1000)


Epoch 0, Loss: 1.5474
Epoch 100, Loss: 0.9780
Epoch 200, Loss: 0.9054
Epoch 300, Loss: 0.8757
Epoch 400, Loss: 0.8528
Epoch 500, Loss: 0.8312
Epoch 600, Loss: 0.8117
Epoch 700, Loss: 0.7945
Epoch 800, Loss: 0.7797
Epoch 900, Loss: 0.7646
