In [1]:
import numpy as np

In [None]:
import pandas as pd

df = pd.read_csv('./datasets/two_feature_dataset.csv')
df
X = df.drop('target', axis=1)
y = df[['target']]

In [747]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert DataFrames to NumPy arrays
X_train = X_train.values
X_test = X_test.values
y_train = y_train.values
y_test = y_test.values

# Note: y_train and y_test are currently (N, 1) based on your 'Target' column.
# If you intend for them to have 2 columns, you need to specify what the second column should contain.
# For now, their shape is (num_samples, 1), which is correct for a single-output model.

print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")

X_train shape: (800, 2)
X_test shape: (200, 2)
y_train shape: (800, 1)
y_test shape: (200, 1)


In [748]:
# Neural Network layer
class NNLayer:
  def __init__(self, numNN, numFeat, act=True):
    self.w = np.random.randn(numNN, numFeat)  # numNN: number of neurons in this layer, numFeat: number of input features
    self.b = np.zeros((1, numNN)) # Bias should match the number of neurons in this layer
    self.op = None
    self.act = act

  def sigmoid(self):   # Activation function
    return 1/(1 + np.exp(-self.z))

  def sigmoid_derivative(self):
    # Correct sigmoid derivative: s * (1 - s) where s is the sigmoid output
    return self.op * (1 - self.op)


  def feedforward(self, x):
    self.x = x     # input (num_samples, numFeat)
    # Use matrix multiplication @ and ensure weights are transposed correctly
    self.z = self.x @ self.w.T + self.b  # (num_samples, numFeat) @ (numFeat, numNN) + (1, numNN) -> (num_samples, numNN)
    if(self.act):
      self.op = self.sigmoid()     # Applying activation function
    else:
      self.op = self.z
    return self.op

  def backprop(self, yb, alpha=0.001):   # yb is the derivative of cost w.r.t. output (dC/d_op)
    if(self.act):
      # d_op/dz = sigmoid_derivative
      dz = self.sigmoid_derivative()
      dC_dz = yb * dz # dC/dz
    else:
      # For linear activation, op=z, so d_op/dz = 1
      dC_dz = yb # dC/dz

    # dC/db: mean dC/dz over samples
    db = np.mean(dC_dz, axis=0, keepdims=True) # shape (1, numNN)

    # dC/dw: dw = dC/dz.T @ self.x (since self.w is (numNN, numFeat))
    dw = dC_dz.T @ self.x # (numNN, num_samples) @ (num_samples, numFeat) -> (numNN, numFeat)

    # print(dw.shape)
    # dC/dx_prev: derivative to pass to previous layer
    # dC/dx_prev = dC/dz @ self.w
    dC_dx_prev = dC_dz @ self.w # (num_samples, numNN) @ (numNN, numFeat) -> (num_samples, numFeat)

    self.b = self.b - alpha * db
    self.w = self.w - alpha * dw

    return dC_dx_prev # Return dC/dx_prev


In [749]:
# Here we are going to use avg SSR as its regression prob to evaluate the model
def costfunction(op, y):
  cost = y - op
  cost = (cost*cost)
  return cost.mean()

In [750]:
class ANN:
  def __init__(self, alpha=0.001):
    self.layers = []
    self.alpha = alpha

  def addLayer(self, layer):
    self.layers.append(layer)

  def forward(self, X):
    for layer in self.layers:
      X = layer.feedforward(X)     # Our new X is o/p of each layer
    return X                       # X is the output

  def backward(self, yb):
    for i in range(len(self.layers)-1, -1, -1):
      yb = self.layers[i].backprop(yb, self.alpha)

  def predict(self, X):
    for layer in self.layers:
      X = layer.feedforward(X)     # Our new X is o/p of each layer
    return X


  def train(self, X, y, ep):
    for i in range(1, ep+1):
      op = self.forward(X)
      #print(op.shape)
      error = costfunction(op, y)

      print(f'Epoch {i}/{ep} - Error = {error}')
      print('========================================')

      yb = -2 * (y - op)

      self.backward(yb)



In [751]:
learning_rate=0.001
model = ANN(learning_rate)
model.addLayer(NNLayer(3, X_train.shape[1])) # numNN=3 (output features), numFeat=2 (input features from X_train), act=True
model.addLayer(NNLayer(4, 3, 1))
model.addLayer(NNLayer(2, 4, 1))
model.addLayer(NNLayer(1, 2, False)) # numNN=1 (output feature), numFeat=3 (input features from previous layer), act=False
model

<__main__.ANN at 0x78886a8d2ab0>

In [752]:
# Fitting the model
epoch=500
model.train(X_train, y_train, epoch)

Epoch 1/500 - Error = 14.156846304010141
Epoch 2/500 - Error = 13.63176276464897
Epoch 3/500 - Error = 13.591345118660719
Epoch 4/500 - Error = 13.573535620106545
Epoch 5/500 - Error = 13.551665986953294
Epoch 6/500 - Error = 13.524308236359062
Epoch 7/500 - Error = 13.489663900332411
Epoch 8/500 - Error = 13.444811375990657
Epoch 9/500 - Error = 13.38480484239356
Epoch 10/500 - Error = 13.30091299765968
Epoch 11/500 - Error = 13.17693464163933
Epoch 12/500 - Error = 12.981037068346026
Epoch 13/500 - Error = 12.64701129217018
Epoch 14/500 - Error = 12.03209712816063
Epoch 15/500 - Error = 10.840764837214424
Epoch 16/500 - Error = 8.626592345212622
Epoch 17/500 - Error = 5.503646155720637
Epoch 18/500 - Error = 3.234261938514329
Epoch 19/500 - Error = 2.5206922702627943
Epoch 20/500 - Error = 2.3071764327549427
Epoch 21/500 - Error = 2.1662321293675255
Epoch 22/500 - Error = 2.1600904055740973
Epoch 23/500 - Error = 4.43879131843941
Epoch 24/500 - Error = 26.759898401380532
Epoch 25/500

In [753]:
y_preds = model.predict(X_test)
y_preds[:10]

array([[ 0.50115075],
       [ 6.33617169],
       [-5.79303417],
       [-5.68034048],
       [-5.66363319],
       [ 0.86456759],
       [-2.59830237],
       [ 1.55534669],
       [ 1.72592905],
       [-5.75413272]])

In [754]:
# Evaluating model
from sklearn.metrics import r2_score

print(r2_score(y_test, y_preds))

0.8983898282341919
