In [2]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split

In [4]:
n_samples = 500
n_features = 3

X = np.random.randn(n_samples, n_features)

true_weights = np.array([-2.0, 3, 0.4])
true_bias = 0.4

noise = np.random.randn(n_samples) * 0.01 # Gaussian Noise

y = np.dot(X, true_weights) + true_bias + noise 

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size= 0.2, random_state= 42)



In [None]:
def sigmoid(z):
    return 1/(1 + np.exp(-z))

def relu(z):
    return np.maximum(0, z)

def dSig(z):
    s = sigmoid(z)
    return s*(1 - s)

def dRelu(z):
    return (z > 0).astype(float)

class NeuralNet:
    def __init__(self, input_size, hidden_size1, hidden_size2, learning_rate = 0.01):
        # super(SimpleNN, self).__init__()
        self.input_size = input_size # 3
        self.hidden_size1 = hidden_size1 # 4
        self.hidden_size2 = hidden_size2 # 5

        self.lr = learning_rate
        self.W1 = np.random.randn(input_size, hidden_size1) # self.fc1 = nn.Linear(input_size, hidden1)
        self.b1 = np.zeros(hidden_size1)
        self.W2 = np.random.randn(hidden_size1, hidden_size2) # self.fc2 = nn.Linear(hidden1, hidden2)
        self.b2 = np.zeros(hidden_size2)
        self.W3 = np.random.randn(hidden_size2, 1) # self.fc3 = nn.Linear(hidden2, output_size)
        self.b3 = 0
        # self.relu = nn.ReLU()
        # self.sigmoid = nn.Sigmoid()


    def forward(self, X):
        self.Z1 = np.dot(X, self.W1) + self.b1 #in abstracted packages, you don't need this
        self.Y1 = relu(self.Z1)
        self.Z2 = np.dot(self.Y1, self.W2) + self.b2 # #in abstracted packages, you don't need this
        self.Y2 = relu(self.Z2)
        self.Z3 = np.dot(self.Y2, self.W3) + self.b3  # #in abstracted packages, you don't need this
        self.Y3 = sigmoid(self.Z3)

        return self.Y3

    def backward(self, X, y, y_pred):
        m = y.shape[0]
        # Note: You should transpose the weight matrices when propagating gradients backward, for example:

        dz3 = y_pred - y.reshape(-1,1)
        dw3 = (1/m) * np.dot(self.Y2.T, dz3) #dw3 = (1/m) * np.dot(self.Y2.T, dz3) for classifcation, divide by m in dw3; for regression, divide by m
        db3 = dz3.mean(axis = 0)

        dy2 = np.dot(dz3, self.W3.T)
        dz2 = dy2 *  dRelu(self.Z2) # do elementwise mult not np.dot
        dw2 = (1/m) * np.dot(self.Y1.T, dz2)
        db2 = dz2.mean(axis = 0)

        dy1 = np.dot(dz2, self.W2.T)
        dz1 = dy1 *  dRelu(self.Z1)
        dw1 = (1/m) * np.dot(X.T, dz1)
        db1 = dz1.mean(axis = 0)

        self.W3 -= self.lr * dw3
        self.b3 -= self.lr * db3
        self.W2 -= self.lr * dw2
        self.b2 -= self.lr * db2
        self.W1 -= self.lr * dw1
        self.b1 -= self.lr * db1

    def fit(self, X,y, epochs = 1000):
        self.loss_history = []
        for epoch in range(epochs):
            y_pred = self.forward(X) # outputs = model(torch.tensor(X_train, dtype=torch.float32))
            ls = self.compute_loss(y, y_pred) # loss = criterion(outputs, torch.tensor(y_train, dtype=torch.float32).unsqueeze(1))
            self.loss_history.append(ls)
            self.backward(X,y, y_pred)
            # optimizer.step()

            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {ls:.4f}")

    def compute_loss(self, y_true, y_pred):
        epsilon = 1e-15
        loss = -(y_true*np.log(y_pred + epsilon) + (1 - y_true)*np.log(1-y_pred + epsilon)).mean()
        return loss

    def predict(self, X):
        # with torch.no_grad():
        y_pred = self.forward(X) # preds = model(torch.tensor(X_test, dtype=torch.float32))
        return (y_pred > 0.5).astype(int) # predicted_classes = (preds > 0.5).int()



In [13]:
np.random.seed(42)
X = np.random.randn(100, 3)
true_weights = np.array([1.5, -2.0, 0.5])
logits = X.dot(true_weights) + 0.7
y = (1 / (1 + np.exp(-logits)) > 0.5).astype(int)

model = NeuralNet(input_size=3, hidden_size1=4, hidden_size2=5, learning_rate=0.01)
model.fit(X, y, epochs=1000)
predictions = model.predict(X)

print("Predictions:", predictions[:10])

Epoch 0, Loss: 1.2679
Epoch 100, Loss: 0.9619
Epoch 200, Loss: 0.9373
Epoch 300, Loss: 0.9921
Epoch 400, Loss: 1.0811
Epoch 500, Loss: 1.1917
Epoch 600, Loss: 1.3060
Epoch 700, Loss: 1.4202
Epoch 800, Loss: 1.5351
Epoch 900, Loss: 1.6532
Predictions: [[1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]]
