In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy
from sklearn.preprocessing import MinMaxScaler

df = pd.read_csv('/kaggle/input/mla2-part2/mla2part2.csv')
df.describe()

Unnamed: 0,id,Date,number of bedrooms,number of bathrooms,living area,lot area,number of floors,waterfront present,number of views,condition of the house,...,Built Year,Renovation Year,Postal Code,Lattitude,Longitude,living_area_renov,lot_area_renov,Number of schools nearby,Distance from the airport,Price
count,14620.0,14620.0,14620.0,14620.0,14620.0,14620.0,14620.0,14620.0,14620.0,14620.0,...,14620.0,14620.0,14620.0,14620.0,14620.0,14620.0,14620.0,14620.0,14620.0,14620.0
mean,6762821000.0,42604.538646,3.379343,2.129583,2098.262996,15093.28,1.50236,0.007661,0.233105,3.430506,...,1970.926402,90.924008,122033.062244,52.792848,-114.404007,1996.702257,12753.500068,2.012244,64.950958,538932.2
std,6237.575,67.347991,0.938719,0.769934,928.275721,37919.62,0.540239,0.087193,0.766259,0.664151,...,29.493625,416.216661,19.082418,0.137522,0.141326,691.093366,26058.414467,0.817284,8.936008,367532.4
min,6762810000.0,42491.0,1.0,0.5,370.0,520.0,1.0,0.0,0.0,1.0,...,1900.0,0.0,122003.0,52.3859,-114.709,460.0,651.0,1.0,50.0,78000.0
25%,6762815000.0,42546.0,3.0,1.75,1440.0,5010.75,1.0,0.0,0.0,3.0,...,1951.0,0.0,122017.0,52.7076,-114.519,1490.0,5097.75,1.0,57.0,320000.0
50%,6762821000.0,42600.0,3.0,2.25,1930.0,7620.0,1.5,0.0,0.0,3.0,...,1975.0,0.0,122032.0,52.8064,-114.421,1850.0,7620.0,2.0,65.0,450000.0
75%,6762826000.0,42662.0,4.0,2.5,2570.0,10800.0,2.0,0.0,0.0,4.0,...,1997.0,0.0,122048.0,52.9089,-114.315,2380.0,10125.0,3.0,73.0,645000.0
max,6762832000.0,42734.0,33.0,8.0,13540.0,1074218.0,3.5,1.0,4.0,5.0,...,2015.0,2015.0,122072.0,53.0076,-113.505,6110.0,560617.0,3.0,80.0,7700000.0


In [2]:
df.duplicated().sum()

0

identifying columns in the DataFrame that contain string/object data, converting them into categorical type, and then transforming the categorical values into corresponding numerical codes, replacing the original string/object data in those columns.

In [3]:
for col in df.columns:
    if(df[col].dtype=='object'):
        df[col]=df[col].astype('category')
        df[col]=df[col].cat.codes

We need the input data which doesn't contain the column 'Price', s we are dropping that and storing the other features in X.

In [4]:
X = df.drop(columns='Price')
X = np.asarray(X, dtype='float64')
print("Shapes - X:", X.shape)
print("Data Types - X:", X.dtype)

Shapes - X: (14620, 22)
Data Types - X: float64


In [5]:
Y = df['Price']
Y.head(5)

0    2380000
1    1400000
2    1200000
3     838000
4     805000
Name: Price, dtype: int64

In [6]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.3,random_state=0)

print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

(10234, 22)
(4386, 22)
(10234,)
(4386,)


# Activation Functions

These are mathematical functions commonly used in machine learning and neural networks for introducing non-linearity or maintaining linearity in the activation of neurons or nodes. Each of these functions serves a different purpose in neural networks involving transformations of data or outputs within a neural network layer.

In [7]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def relu(x):
    return np.maximum(0, x)

def tanh(x):
    return np.tanh(x)

def linear(x):
    return x

# Loss Functions

These are commonly used error or loss functions in machine learning for evaluating the performance of regression models. Each of these error functions measures the discrepancy or difference between predicted values and true values in a regression setting, helping to assess the performance of machine learning models by quantifying the errors made by the model in predicting the target values.

In [8]:
def mean_squared_error(predictions, targets):
    m = targets.shape[0]
    loss = np.sum((predictions - targets) ** 2) / m
    return loss

def mean_absolute_error(predictions, targets):
    m = targets.shape[0]
    error = np.sum(np.abs(predictions - targets)) / m
    return error

def root_mean_squared_error(y_true, y_pred):
    m = targets.shape[0]
    error = np.sqrt(np.mean((y_true - y_pred) ** 2)) / m
    return error

# Neural Network Implementation

This function, initialize_parameters, is used in neural network initialization for setting the initial values of weights and biases. Weight initialization scales these random values by multiplying with np.sqrt(2 / input_size). This scaling factor is used to prevent the weights from becoming too large or too small during initialization, which can affect learning. Overall, this function sets up the initial parameters (weights and biases) of a neural network model based on the specified layer sizes, using random initialization with specific scaling factors to facilitate better convergence and learning during the training process.

In [9]:
def initialize_parameters(input_size, hidden_size, output_size):
    np.random.seed(0)
    weights_1 = np.random.randn(hidden_size, input_size) * np.sqrt(2 / input_size)
    biases_1 = np.zeros((hidden_size, 1))
    weights_2 = np.random.randn(output_size, hidden_size) * np.sqrt(2 / hidden_size)
    biases_2 = np.zeros((output_size, 1))
    return weights_1, biases_1, weights_2, biases_2

## Forward Propagation: 
* Linear activation function for results of the input layer
* ReLU activation function for the results of the hidden layer

In [10]:
def forward_propagation(input_data, parameters):
    weights_1, biases_1, weights_2, biases_2 = parameters
    Z1 = np.dot(weights_1, input_data.T) + biases_1
    A1 = linear(Z1) 
    Z2 = np.dot(weights_2, A1) + biases_2
    A2 = linear(Z2)
    cache = {"Z1": Z1, "A1": A1, "Z2": Z2, "A2": A2}
    return A2, cache

# Backward Propagation

This function, backward_propagation, is a part of the backpropagation process in a neural network used for computing gradients of the loss function with respect to the model parameters. Overall, this function computes the gradients of the loss function with respect to the parameters (weights and biases) using backpropagation, enabling the optimization algorithm to update the neural network's parameters to minimize the loss and improve prediction accuracy.

In [11]:
def backward_propagation(parameters, cache, input_data, labels):
    m = labels.shape[0]
    Z1, A1, Z2, A2 = cache["Z1"], cache["A1"], cache["Z2"], cache["A2"]

    labelsT = labels.T
    dA2 = - (labelsT / A2) + ((1 - labelsT) / (1 - A2))
    dZ2 = dA2 * (A2 * (1-A2))
    dW2 = (1/m) * np.dot(dZ2, A1.T)
    db2 = (1/m) * np.sum(dZ2, axis=1, keepdims=True)
    
    dA1 = np.dot(parameters[2].T, dZ2)
    dZ1 = dA1 * (A1 * (1-A1))
    dW1 = (1/m) * np.dot(dZ1, input_data)
    db1 = (1/m) * np.sum(dZ1, axis=1, keepdims=True)

    return dW1, db1, dW2, db2

The function update_parameters is responsible for updating the parameters (weights and biases) of a neural network using the gradients obtained from the backpropagation process. This function implements the gradient descent algorithm by subtracting a fraction of the gradients (scaled by the learning rate) from the current parameter values, aiming to minimize the loss function and improve the neural network's performance during training.

In [12]:
def update_parameters(parameters, gradients, learning_rate):
    dW1, db1, dW2, db2 = gradients
    weights_1, biases_1, weights_2, biases_2 = parameters
 
    weights_1 = weights_1 - learning_rate * dW1
    biases_1 = biases_1 - learning_rate * db1
    weights_2 = weights_2 - learning_rate * dW2
    biases_2 = biases_2 - learning_rate * db2
    
    return weights_1, biases_1, weights_2, biases_2

'Train" function is used to train a neural network using a custom training loop.
Iterates for num_iterations:
Performs forward propagation to get the output predictions and caches the values.
Computes the loss using mean_absolute_error function.
Performs backward propagation to compute gradients of the loss with respect to the parameters.
Clips the gradients using np.clip to prevent them from exceeding a certain threshold (-clip_value to clip_value).
Updates the network parameters using gradient descent with update_parameters.
Checks for conditions to potentially stop training:
NaN loss check: If loss becomes NaN, the training stops.
Checks for a change in loss smaller than the tolerance.
Checks if the loss is increasing (early stopping condition).
Prints loss at every tenth iteration.

In [13]:
def train(features, labels, hidden_layer_size, num_iterations, learning_rate, clip_value=2.0, tolerance=1e-6):
    network_parameters = initialize_parameters(features.shape[1], hidden_layer_size, 1)
    prev_loss = float('inf')
    
    optimal_parameters = network_parameters
    
    for i in range(num_iterations):
        output, cache = forward_propagation(features, network_parameters)
        loss = mean_absolute_error(output, labels)
        gradients = backward_propagation(network_parameters, cache, features, labels)
        
        for gradient in gradients:
            np.clip(gradient, -clip_value, clip_value, out=gradient)
        
        network_parameters = update_parameters(network_parameters, gradients, learning_rate)
        
        if not np.isnan(loss):
            optimal_parameters = network_parameters 
            
        if prev_loss < loss:
            print(f"Early stopping at iteration {i} due to minima with loss {loss}")
            break
        if abs(prev_loss - loss) < tolerance:
            print(f"Early stopping at iteration {i} due to tolerance limit with loss {loss}")
            break 
            
        if i % 10 == 0:
            print(f"Iteration {i}: Loss = {loss}")
            
        prev_loss = loss
        
    return optimal_parameters

In [14]:
if not isinstance(Y_train, np.ndarray):
    Y_train = np.array(Y_train)

trained_parameters = train(features=X_train, 
                           labels=Y_train, 
                           hidden_layer_size=128, 
                           num_iterations=10000,
                           learning_rate=0.0001)

Iteration 0: Loss = 1737212571.9291844
Iteration 10: Loss = 1495073889.8231373
Iteration 20: Loss = 1253061435.7867646
Iteration 30: Loss = 1011256995.1874495
Iteration 40: Loss = 769675304.983246
Iteration 50: Loss = 528372466.1417243
Iteration 60: Loss = 287298795.5478638
Iteration 70: Loss = 46590407.278540716
Early stopping at iteration 73 due to minima with loss 22539832.94558511


predict function to generate predictions using a trained neural network and then prints out the shape and content of the predictions. The printed output shows the shape and values of these predictions.

In [15]:
def predict(X, parameters):
    A2, _ = forward_propagation(X, parameters)
    return A2

predictions = predict(X_test, trained_parameters)

print("Predictions Size:", predictions.shape)
print("Predictions:", predictions)

Predictions Size: (1, 4386)
Predictions: [[2056325.89822262 2048231.74323129 2053569.95411801 ... 2053172.12922531
  2057853.89323848 2048139.24532073]]
