In [13]:
import numpy as np
import pandas as pd

In [14]:
df = pd.read_csv("housing.csv")
df.head()

Unnamed: 0,RM,LSTAT,PTRATIO,MEDV
0,6.575,4.98,15.3,504000.0
1,6.421,9.14,17.8,453600.0
2,7.185,4.03,17.8,728700.0
3,6.998,2.94,18.7,701400.0
4,7.147,5.33,18.7,760200.0



### Activation Function
We use the **sigmoid activation function**, defined as: 
$$\sigma(x) = \frac{1}{1 + e^{-x}}$$

In [15]:
class NN:

    def __init__(self, input_layer_size = 2, hidden_layer_size = 2, output_layer_size = 2, lr = 0.5, epoches = 1000):
        self.input_layer_size = input_layer_size
        self.hidden_layer_size = hidden_layer_size
        self.output_layer_size = output_layer_size
        self.lr = lr
        self.number_of_epoches = epoches

        #initialize the weights
        #1. by random numbers
        self.W1 = np.random.randn(self.input_layer_size, self.hidden_layer_size) * np.sqrt(2 / self.input_layer_size)
        self.W2 = np.random.randn(self.hidden_layer_size, self.output_layer_size) * np.sqrt(2 / self.hidden_layer_size)
        # print(f'W1 = {self.W1}')
        # print(f'W2 = {self.W2}')

        #2. with zeros
        # self.W1 = np.zeros((self.input_layer_size, self.hidden_layer_size))
        # self.W2 = np.zeros((self.hidden_layer_size, self.output_layer_size))
        # print(f'W1 = {self.W1}')
        # print(f'W2 = {self.W2}')

        #3. specific numbers
        # self.W1 = np.array([[0.15, 0.25], [0.20, 0.30]])  # ([[w1, w3], [w2, w4]])
        # self.W2 = np.array([[0.40, 0.50], [0.45, 0.55]])
        # print(f'W1 = \n{self.W1}')
        # print(f'W2 = \n{self.W2}')

        # bias
        self.b1 = 0.35
        self.b2 = 0.60

    # define the activation function
    def activation(self, x):
        return (1 / (1 + np.exp(-x)))

    # forward pass
    def forward(self, X):
        self.hidden = self.activation(np.dot(X, self.W1) + self.b1)  # H = activation(X*W1 + b)
        self.output = self.activation(np.dot(self.hidden, self.W2) + self.b2)  # OP = activation(H*W2 + b)

        # print(f'H = {self.hidden}')
        # print(f'OP = {self.output}')
        return self.output

    # back propagation
    def back_prop(self, X, y):
        output = self.forward(X)
        error = np.sum((y - output)**2/2)
        delta_output = np.multiply(-(y - output), output * (1 - output))
        # print(f'Error = {error}')
        self.W2 -= self.lr * (np.dot(self.hidden.T, delta_output)) # W2 = W2 - alpha * (H * ((y-OP) * (OP * (OP - 1))))
        # print(f'New W2 Weights: {self.W2}')

        delta_hidden = np.dot(delta_output, self.W2.T) * self.hidden * (1 - self.hidden)
        self.W1 -= self.lr * np.dot(X.T, delta_hidden)

    # training; update the weights for the given number of epoches
    def train(self, X, y):
        for _ in range(self.number_of_epoches):
            self.back_prop(X, y)

    def predict(self, X):
        return self.forward(X)

The loss function used here is **Mean Squared Error (MSE)**:
$$MSE = \frac{1}{n} \sum_{i=1}^{n} (y_i - \hat{y}_i)^2$$

The **R² score** measures the proportion of variance explained by the model:
$$R^2 = 1 - \frac{\sum (y_i - \hat{y}_i)^2}{\sum (y_i - \bar{y})^2}$$

In [16]:
def mse(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

def r_sq(y_true, y_pred):
    total = np.sum((y_true - np.mean(y_true)) ** 2)
    res = np.sum((y_true - y_pred) ** 2)
    return 1 - (res / total)

In [17]:
def k_fold_validation(X, y, k, model, **kwargs):
    indices = np.arange(len(X))
    np.random.shuffle(indices)
    fold = len(X) // k

    mse_vals = []
    r_sq_vals = []

    for i in range(k):
        indices_valid = indices[i * fold:(i + 1) * fold]
        indices_train = np.concatenate((indices[:i * fold], indices[(i + 1) * fold:]))

        X_valid, X_train = X[indices_valid], X[indices_train]
        y_valid, y_train = y[indices_valid], y[indices_train]

        model_const = model(X_train.shape[1], **kwargs)
        model_const.train(X_train, y_train)

        y_pred = model_const.predict(X_valid)

        mse_vals.append(mse(y_valid, y_pred))
        r_sq_vals.append(r_sq(y_valid, y_pred))

    y_pred = model_const.predict(X)

    return mse_vals[-1], r_sq_vals[-1] 


### Configurations
- **Case (a)**: 3 neurons in hidden layer, learning rate = 0.01
- **Case (b)**: 4 neurons in hidden layer, learning rate = 0.001
- **Case (c)**: 5 neurons in hidden layer, learning rate = 0.0001


In [18]:
learning_rate = float(input("Learning rate: "))
neurons_in = int(input("Number of input neurons: "))
neurons_out = int(input("Number of output neurons: "))
neurons_hidden = int(input("Number of hidden neurons: "))

X = df.iloc[:, :neurons_in].values
y = df.iloc[:, -neurons_out:].values

# min max normalisation
X = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))
y = (y - np.min(y)) / (np.max(y) - np.min(y))

print("Results of Five Fold Cross Validation: ")
mse_f, r_sq_f = k_fold_validation(X, y, 5, NN, hidden_layer_size=neurons_hidden, output_layer_size=neurons_out, lr=learning_rate, epoches=1000)
print(f"Five fold MSE : {mse_f}, R squared : {r_sq_f}\n")

print("Results of Ten Fold Cross Validation: ")
mse_t, r_sq_t = k_fold_validation(X, y, 10, NN, hidden_layer_size=neurons_hidden, output_layer_size=neurons_out, lr=learning_rate, epoches=1000)
print(f"Ten fold MSE : {mse_t}, R squared : {r_sq_t}")

Results of Five Fold Cross Validation: 
Five fold MSE : 0.025429869727998846, R squared : 0.12335954962034201

Results of Ten Fold Cross Validation: 
Ten fold MSE : 0.024390436801866724, R squared : 0.16095699073990177


## Final Report
Input Neurons were fixed at 5, while Output Neurons were fixed at 3. Epochs were 1000.
#### Configuration (a): Learning rate: 0.01, Hidden neurons: 3
- Five fold MSE : 0.009704866488162727, Five fold R² : 0.725380652661557
- Ten fold MSE : 0.007060983316970137, Ten fold R² : 0.8207650474882031

#### Configuration (b): Learning rate: 0.001, Hidden neurons: 4
- Five fold MSE : 0.02047780502454091, Five fold R² : 0.330307428819119
- Ten fold MSE : 0.0205831977379065, Ten fold R²: 0.26219980809154453

#### Configuration (c): Learning rate: 0.0001, Hidden neurons: 5
- Five fold MSE: 0.025429869727998846, Five fold R²: 0.12335954962034201
- Ten fold MSE: 0.024390436801866724, Ten fold R²: 0.16095699073990177