# Setup and Imports

In [1]:
!pip3 -qq install -r ../requirements.txt

In [22]:
import pandas as pd # Used just for loading the respective dataset
import random
import math
from tqdm import tqdm_notebook

In [None]:
!kaggle datasets download -d yasserh/housing-prices-dataset -p ../data

Dataset URL: https://www.kaggle.com/datasets/yasserh/housing-prices-dataset
License(s): CC0-1.0
Downloading housing-prices-dataset.zip to ../data




  0%|          | 0.00/4.63k [00:00<?, ?B/s]
100%|██████████| 4.63k/4.63k [00:00<00:00, 2.55MB/s]


In [None]:
!unzip ../data/housing-prices-dataset.zip -d ../data

Archive:  ../data/housing-prices-dataset.zip
  inflating: ../data/Housing.csv     


# Data Loading

In [None]:
# Loading data using pandas (only for loading)
data = pd.read_csv('../data/housing.csv')

In [5]:
data.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [6]:
data = data[['price', 'area', 'bedrooms', 'stories']]   # As per the requirements of the assignments

In [7]:
data.head()

Unnamed: 0,price,area,bedrooms,stories
0,13300000,7420,4,3
1,12250000,8960,4,4
2,12250000,9960,3,2
3,12215000,7500,4,2
4,11410000,7420,4,2


In [8]:
data = data.values.tolist()

In [9]:
print(data) # For, testing

[[13300000, 7420, 4, 3], [12250000, 8960, 4, 4], [12250000, 9960, 3, 2], [12215000, 7500, 4, 2], [11410000, 7420, 4, 2], [10850000, 7500, 3, 1], [10150000, 8580, 4, 4], [10150000, 16200, 5, 2], [9870000, 8100, 4, 2], [9800000, 5750, 3, 4], [9800000, 13200, 3, 2], [9681000, 6000, 4, 2], [9310000, 6550, 4, 2], [9240000, 3500, 4, 2], [9240000, 7800, 3, 2], [9100000, 6000, 4, 2], [9100000, 6600, 4, 2], [8960000, 8500, 3, 4], [8890000, 4600, 3, 2], [8855000, 6420, 3, 2], [8750000, 4320, 3, 2], [8680000, 7155, 3, 1], [8645000, 8050, 3, 1], [8645000, 4560, 3, 2], [8575000, 8800, 3, 2], [8540000, 6540, 4, 2], [8463000, 6000, 3, 4], [8400000, 8875, 3, 1], [8400000, 7950, 5, 2], [8400000, 5500, 4, 2], [8400000, 7475, 3, 4], [8400000, 7000, 3, 4], [8295000, 4880, 4, 2], [8190000, 5960, 3, 2], [8120000, 6840, 5, 2], [8080940, 7000, 3, 4], [8043000, 7482, 3, 3], [7980000, 9000, 4, 4], [7962500, 6000, 3, 4], [7910000, 6000, 4, 4], [7875000, 6550, 3, 2], [7840000, 6360, 3, 4], [7700000, 6480, 3, 4], 

# Data Preprocessing

In [10]:
# Utility function: Normalizing features to prevent large multiplications
def normalize(data):
    max_vals = [max(col) for col in zip(*data)]
    
    return [[x / max_val if max_val != 0 else x for x, max_val in zip(row, max_vals)] for row in data]

# def normalize(data):
#     # Converting all data to floats
#     data = [[float(x) for x in row] for row in data]
    
#     max_vals = [max(col) for col in zip(*data)]
    
#     return [[x / max_val if max_val != 0 else x for x, max_val in zip(row, max_vals)] for row in data]

In [11]:
data = normalize(data)

print(f"Loaded and normalized dataset with shape: {len(data)}, {len(data[0])}")

Loaded and normalized dataset with shape: 545, 4


# (Random) Data Splitting

In [12]:
# Splitting dataset into training (80%) and test (20%)
random.shuffle(data)
split_index = int(0.7 * len(data))
train_data = data[:split_index]
test_data = data[split_index:]

print(f"Training set size: {len(train_data)}, Test set size: {len(test_data)}")

Training set size: 381, Test set size: 164


In [13]:
print(train_data[:5]) # For testing
print(test_data[:5]) 

[[0.3526315789473684, 0.22407407407407406, 0.5, 0.5], [0.22631578947368422, 0.31333333333333335, 0.5, 0.25], [0.30526315789473685, 0.30814814814814817, 0.5, 0.5], [0.3868421052631579, 0.4925925925925926, 0.5, 0.25], [0.3368421052631579, 0.24938271604938272, 0.5, 0.5]]
[[0.22894736842105262, 0.2345679012345679, 0.3333333333333333, 0.25], [0.23421052631578948, 0.18518518518518517, 0.5, 0.25], [0.25263157894736843, 0.23148148148148148, 0.5, 0.25], [0.23157894736842105, 0.2777777777777778, 0.3333333333333333, 0.5], [0.3157894736842105, 0.3611111111111111, 0.3333333333333333, 0.25]]


# Model Architecture

In [18]:
class SimpleNeuralNetwork:

    def __init__(self, parameters, learning_rate, iterations):
        self.parameters = parameters  # List of 8 parameters
        self.learning_rate = learning_rate
        self.iterations = iterations

    def multiply_forward(self, a, b):
        return a * b

    def multiply_backward(self, upper_derivative_output, a, b):
        return upper_derivative_output * b, upper_derivative_output * a

    def add_forward(self, a, b):
        return a + b

    def add_backward(self, upper_derivative_output):
        return upper_derivative_output, upper_derivative_output

    def max_forward(self, a, b):
        return a if a > b else b

    def max_backward(self, upper_derivative_output, a, b):
        return (upper_derivative_output if a > b else 0), (upper_derivative_output if b > a else 0)

    def relu_forward(self, x):
        return self.max_forward(0, x)

    def relu_backward(self, upper_derivative_output, x):
        # Only the gradient corresponding to the active neuron flows back (the first one)
        return self.max_backward(upper_derivative_output, x, 0)[0]

    def sigmoid_forward(self, x):
        if x > 500:
            return 1.0
        elif x < -500:
            return 0.0
        return 1 / (1 + math.exp(-x))

    def sigmoid_backward(self, upper_derivative_output, activated_sigmoid):
        return upper_derivative_output * activated_sigmoid * (1 - activated_sigmoid)

    def affine_forward(self, area, bedrooms, stories):
        # Computing weighted sum for ReLU branch
        weighted_sum_relu = self.add_forward(   # With the parameters[8] as bias term
            self.add_forward(
                self.add_forward(
                    self.multiply_forward(area, self.parameters[0]),
                    self.multiply_forward(bedrooms, self.parameters[1])
                ),
                self.multiply_forward(stories, self.parameters[2])
            ),
            self.parameters[8]  # Bias term
        )
        activated_relu = self.relu_forward(weighted_sum_relu)

        # Computing weighted sum for Sigmoid branch
        weighted_sum_sigmoid = self.add_forward(
            self.add_forward(
                self.add_forward(
                    self.multiply_forward(area, self.parameters[3]),
                    self.multiply_forward(bedrooms, self.parameters[4])
                ),
                self.multiply_forward(stories, self.parameters[5])
            ),
            self.parameters[8]  # Bias term
        )
        activated_sigmoid = self.sigmoid_forward(weighted_sum_sigmoid)

        # Combining contributions from both branches
        relu_contribution = self.multiply_forward(self.parameters[6], activated_relu)
        sigmoid_contribution = self.multiply_forward(self.parameters[7], activated_sigmoid)
        
        predicted_price = self.add_forward(relu_contribution, sigmoid_contribution)

        return predicted_price, activated_relu, activated_sigmoid, weighted_sum_relu, weighted_sum_sigmoid

    def affine_backward(self, area, bedrooms, stories, actual_price,
                        activated_relu, activated_sigmoid,
                        weighted_sum_relu, weighted_sum_sigmoid):
        grads = [0] * 9
        
        # Computing the error (difference between prediction and actual price)
        # error = predicted_price - actual_price
        error = (self.parameters[6] * activated_relu + self.parameters[7] * activated_sigmoid) - actual_price

        # ud_1, ud_2 = self.add_backward(error)

        # ud_3, ud_4 = self.multiply_backward(ud_1, activated_relu, self.parameters[6])
        # ud_5, ud_6 = self.multiply_backward(ud_2, activated_sigmoid, self.parameters[7])

        # # Backpropagate through the ReLU and Sigmoid branches
        # upper_derivative_wrt_relu = self.relu_backward(ud_3, activated_relu)
        # upper_derivative_wrt_sigmoid = self.sigmoid_backward(ud_5, activated_sigmoid)

        upper_derivative_wrt_relu = self.relu_backward(error, weighted_sum_relu)
        upper_derivative_wrt_sigmoid = self.sigmoid_backward(error, weighted_sum_sigmoid)

        # Compute gradients for the weights of the ReLU branch
        grads[0], _ = self.multiply_backward(upper_derivative_wrt_relu, area, self.parameters[0])
        grads[1], _ = self.multiply_backward(upper_derivative_wrt_relu, bedrooms, self.parameters[1])
        grads[2], _ = self.multiply_backward(upper_derivative_wrt_relu, stories, self.parameters[2])

        # Compute gradients for the weights of the Sigmoid branch
        grads[3], _ = self.multiply_backward(upper_derivative_wrt_sigmoid, area, self.parameters[3])
        grads[4], _ = self.multiply_backward(upper_derivative_wrt_sigmoid, bedrooms, self.parameters[4])
        grads[5], _ = self.multiply_backward(upper_derivative_wrt_sigmoid, stories, self.parameters[5])

        # Gradients for the contributions from ReLU and Sigmoid activations
        grads[6] = self.multiply_backward(upper_derivative_wrt_relu, self.parameters[6], activated_relu)[0]
        grads[7] = self.multiply_backward(upper_derivative_wrt_sigmoid, self.parameters[7], activated_sigmoid)[0]

        # Gradient for the bias term (shared by both branches)
        bias_grad_relu, _ = self.add_backward(upper_derivative_wrt_relu)
        bias_grad_sigmoid, _ = self.add_backward(upper_derivative_wrt_sigmoid)
        grads[8] = bias_grad_relu + bias_grad_sigmoid

        return grads

    def compute_loss(self, data):
        total_loss = 0
        
        for actual_price, area, bedrooms, stories in data:
            predicted_price, _, _, _, _ = self.affine_forward(area, bedrooms, stories)
            error = predicted_price - actual_price
            total_loss += error ** 2
        
        return total_loss / len(data)

    def compute_gradients(self, data):
        grads = [0] * len(self.parameters)
        
        for actual_price, area, bedrooms, stories in data:
            _, activated_relu, activated_sigmoid, weighted_sum_relu, weighted_sum_sigmoid = self.affine_forward(area, bedrooms, stories)
            sample_grads = self.affine_backward(area, bedrooms, stories, actual_price,
                                                activated_relu, activated_sigmoid,
                                                weighted_sum_relu, weighted_sum_sigmoid)
            for i in range(len(self.parameters)):
                grads[i] += sample_grads[i]
        
        return [g / len(data) for g in grads]

    def train(self, train_data, test_data):
        print(f"Starting training with {len(train_data)} samples...")

        for i in tqdm_notebook(range(self.iterations)):
            grads = self.compute_gradients(train_data)

            # Updating parameters using gradient descent
            for j in range(len(self.parameters)):
                self.parameters[j] -= self.learning_rate * grads[j]

            if i % 100 == 0:
                train_loss = self.compute_loss(train_data)
                test_loss = self.compute_loss(test_data)
                print(f"Iteration {i}: Train Loss = {train_loss}, Test Loss = {test_loss}")

        print("\nTraining completed.")
        
        final_test_loss = self.compute_loss(test_data)
        
        print(f"\nFinal Test Loss: {final_test_loss}")

# Model Training

In [19]:
# Initializing small random parameters
random.seed(42)

parameters = [random.uniform(-0.1, 0.1) for _ in range(9)]  # 8+1 for bias term

# print(parameters) # For, testing

learning_rate = 0.01
iterations = 1500

In [20]:
nn = SimpleNeuralNetwork(parameters, learning_rate, iterations)

nn.train(train_data, test_data)

Starting training with 381 samples...


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for i in tqdm_notebook(range(self.iterations)):


  0%|          | 0/1500 [00:00<?, ?it/s]

Iteration 0: Train Loss = 0.18310447167255936, Test Loss = 0.17149833198396594
Iteration 100: Train Loss = 0.1825727410327287, Test Loss = 0.17098011913268316
Iteration 200: Train Loss = 0.18177951604482545, Test Loss = 0.17020710131093056
Iteration 300: Train Loss = 0.18059888834892954, Test Loss = 0.16905664238516224
Iteration 400: Train Loss = 0.17884996124923627, Test Loss = 0.1673526119959312
Iteration 500: Train Loss = 0.17627990930672016, Test Loss = 0.16484901199843494
Iteration 600: Train Loss = 0.16803664468950125, Test Loss = 0.15696477496374875
Iteration 700: Train Loss = 0.1032398611275021, Test Loss = 0.09436760265171233
Iteration 800: Train Loss = 0.04183718743360201, Test Loss = 0.03596775025556542
Iteration 900: Train Loss = 0.025429286964271307, Test Loss = 0.021131222777521257
Iteration 1000: Train Loss = 0.02295255310941273, Test Loss = 0.019242053369103198
Iteration 1100: Train Loss = 0.022651780143451136, Test Loss = 0.01914461488473045
Iteration 1200: Train Loss 

# Simple Flowgraph Based Implementation

### Model Architecture

In [23]:
def multiply_f(a, b):
    return a * b, b, a  # returns (a*b, d(a*b)/da = b, d(a*b)/db = a)

def add_f(a, b):
    return a + b, 1, 1  # ∂(a+b)/∂a = 1, ∂(a+b)/∂b = 1

def max_f(x, y):
    if x >= y:
        return x, 1, 0  # derivative: 1 w.r.t. first input, 0 w.r.t. second
    else:
        return y, 0, 1

def multiply_b(d_out, local_a, local_b):
    d_a = d_out * local_a  # dL/da = upstream_grad * (∂(a*b)/∂a)
    d_b = d_out * local_b  # dL/db = upstream_grad * (∂(a*b)/∂b)

    return d_a, d_b

def add_b(d_out, local_a, local_b):
    return d_out * local_a, d_out * local_b


def max_b(d_out, local_first, local_second):
    return d_out * local_first, d_out * local_second

In [None]:
def simple_nn(P, data):
    total_loss = 0
    gradients = [0] * 8  # gradients for P[0]...P[7]
    
    for Y, x, y, z in train_data:
        # # Forward Pass
        # ReLU branch
        o1, ldo1x, ldo1p0 = multiply_f(x, P[0])    # o1 = x * P[0]
        o2, ldo2y, ldo2p1 = multiply_f(y, P[1])       # o2 = y * P[1]
        o3, ldo3z, ldo3p2 = multiply_f(z, P[2])       # o3 = z * P[2]
        
        o4, ldo4o1, ldo4o2 = add_f(o1, o2)            # o4 = o1 + o2
        o5, ldo5o4, ldo5o3 = add_f(o4, o3)            # o5 = o4 + o3
        
        # ReLU forward
        o6, ldo60, ldo6o5 = max_f(0, o5)              # o6 = max(0, o5)
        o7, ldo7o6, ldo7p6 = multiply_f(o6, P[6])      # o7 = o6 * P[6]
        
        # Sigmoid branch
        o8, ldo8x, ldo8p3 = multiply_f(x, P[3])       # o8 = x * P[3]
        o9, ldo9y, ldo9p4 = multiply_f(y, P[4])       # o9 = y * P[4]
        o10, ldo10z, ldo10p5 = multiply_f(z, P[5])     # o10 = z * P[5]
        
        o11, ldo11o8, ldo11o9 = add_f(o8, o9)         # o11 = o8 + o9
        o12, ldo12o11, ldo12o10 = add_f(o11, o10)      # o12 = o11 + o10
        
        # Sigmoid forward
        sigmoid = 1/(1 + math.exp(-o12))
        sd = sigmoid * (1 - sigmoid)  # Derivative of the sigmoid
        
        o13, ldo13sigmoid, ldo13p7 = multiply_f(sigmoid, P[7])  # o13 = sigmoid * P[7]
        
        # Final output: add ReLU branch (o7) and Sigmoid branch (o13)
        o14, ldo14o7, ldo14o13 = add_f(o7, o13)
        
        # Compute squared error loss: L = (o14 - Y)^2
        loss = (o14 - Y) ** 2
        total_loss += loss
        
        # # Backward Pass 
        # Starting from the loss: dL/do14 = 2*(o14 - Y)
        d_o14 = 2 * (o14 - Y)
        
        # Backprop through final addition: o14 = o7 + o13
        grad_o7, grad_o13 = add_b(d_o14, ldo14o7, ldo14o13)
        
        # # ReLU Branch Backward
        # o7 = o6 * P[6]
        grad_o6, grad_P6 = multiply_b(grad_o7, ldo7o6, ldo7p6)
        gradients[6] += grad_P6
        
        # o6 = max(0, o5)
        # (We ignore the gradient flowing to the constant 0 input)
        _, grad_o5 = max_b(grad_o6, ldo60, ldo6o5)
        
        # o5 = o4 + o3
        grad_o4, grad_o3 = add_b(grad_o5, ldo5o4, ldo5o3)
        
        # o4 = o1 + o2
        grad_o1, grad_o2 = add_b(grad_o4, ldo4o1, ldo4o2)
        
        # o1 = x * P[0]
        _, grad_P0 = multiply_b(grad_o1, ldo1x, ldo1p0)
        gradients[0] += grad_P0
        
        # o2 = y * P[1]
        _, grad_P1 = multiply_b(grad_o2, ldo2y, ldo2p1)
        gradients[1] += grad_P1
        
        # o3 = z * P[2]
        _, grad_P2 = multiply_b(grad_o3, ldo3z, ldo3p2)
        gradients[2] += grad_P2
        
        # # Sigmoid Branch Backward 
        # o13 = sigmoid * P[7]
        grad_sigmoid, grad_P7 = multiply_b(grad_o13, ldo13sigmoid, ldo13p7)
        gradients[7] += grad_P7
        
        # Backprop through the sigmoid nonlinearity:
        # d(sigmoid)/d(o12) = sigmoid*(1 - sigmoid) = sd
        grad_o12 = grad_sigmoid * sd
        
        # o12 = o11 + o10
        grad_o11, grad_o10 = add_b(grad_o12, ldo12o11, ldo12o10)
        
        # o11 = o8 + o9
        grad_o8, grad_o9 = add_b(grad_o11, ldo11o8, ldo11o9)
        
        # o8 = x * P[3]
        _, grad_P3 = multiply_b(grad_o8, ldo8x, ldo8p3)
        gradients[3] += grad_P3
        
        # o9 = y * P[4]
        _, grad_P4 = multiply_b(grad_o9, ldo9y, ldo9p4)
        gradients[4] += grad_P4
        
        # o10 = z * P[5]
        _, grad_P5 = multiply_b(grad_o10, ldo10z, ldo10p5)
        gradients[5] += grad_P5

    # Averaging loss and gradients over the dataset
    avg_loss = total_loss / len(data)
    gradients = [g / len(data) for g in gradients]
    
    return avg_loss, gradients

### Model Training

In [39]:
epochs = 15
patience = 5
best_loss = float('inf')

random.seed(42)

parameters = [random.uniform(-0.1, 0.1) for _ in range(8)]

learning_rate = 0.01

In [None]:
for epoch in tqdm_notebook(range(epochs)):
    loss, grads = simple_nn(parameters, train_data)
    
    # Updating parameters
    for i in range(len(parameters)):
        parameters[i] -= learning_rate * grads[i]
    
    # Early stopping check
    if loss < best_loss:
        best_loss = loss
        patience_counter = 0
    else:
        patience_counter += 1
        
    if patience_counter >= patience:
        print(f"Early stopping at epoch {epoch}")
    
        break
        
    print(f"Epoch {epoch}: Loss = {loss:.6f}")

print("\nTraining completed.")
print("\nOptimized Parameters:", parameters)
print(f"Final Loss: {loss:.6f}")

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for epoch in tqdm_notebook(range(epochs)):


  0%|          | 0/15 [00:00<?, ?it/s]

Epoch 0: Loss = 0.140343
Epoch 1: Loss = 0.139124
Epoch 2: Loss = 0.137918
Epoch 3: Loss = 0.136723
Epoch 4: Loss = 0.135541
Epoch 5: Loss = 0.134371
Epoch 6: Loss = 0.133212
Epoch 7: Loss = 0.132066
Epoch 8: Loss = 0.130930
Epoch 9: Loss = 0.129807
Epoch 10: Loss = 0.128694
Epoch 11: Loss = 0.127593
Epoch 12: Loss = 0.126502
Epoch 13: Loss = 0.125423
Epoch 14: Loss = 0.124355

Training completed.

Optimized Parameters: [0.02788535969157674, -0.09499784895546662, -0.04499413632617615, -0.055417287176487626, 0.04720255751880011, 0.03525772714941775, 0.0784359135409691, 0.08162299857179622]
Final Loss: 0.124355
