<a href="https://colab.research.google.com/github/bhargav23/AIML-DL-Lab/blob/main/4_Neural_Network_for_Boston_Housing_Price_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**4. Design a neural network for predicting house prices using Boston Housing Price dataset.**

In [1]:
# Import necessary libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# --- 1. Load the Dataset ---
# The Boston Housing dataset is no longer available directly from scikit-learn.
# We will load it from a reliable online source.
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
target = raw_df.values[1::2, 2]

# Create a pandas DataFrame for easier manipulation
# The feature names are well-documented for this dataset.
feature_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']
X = pd.DataFrame(data, columns=feature_names)
y = pd.Series(target, name='MEDV')

In [3]:
# --- 2. Data Preprocessing ---

# Split the data into training and testing sets
# This allows us to evaluate the model's performance on unseen data.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
# Neural networks often perform better when numerical input features are scaled to a similar range.
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [4]:
# --- 3. Build the Neural Network Model ---

# We will create a sequential model, which is a linear stack of layers.
model = keras.Sequential([
    # Input layer: The input_shape should match the number of features.
    # We use a 'relu' (Rectified Linear Unit) activation function, which is a common choice for hidden layers.
    layers.Dense(64, activation='relu', input_shape=[X_train.shape[1]]),

    # Hidden layer: Another dense layer with 64 neurons and 'relu' activation.
    # Adding more layers can help the model learn more complex patterns.
    layers.Dense(64, activation='relu'),

    # Output layer: A single neuron for the output, as we are predicting a single value (the house price).
    # There is no activation function here, as we want to predict a continuous value.
    layers.Dense(1)
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [5]:
# --- 4. Compile the Model ---

# We need to configure the model for training.
# We'll use the 'adam' optimizer, which is an efficient and popular choice.
# The loss function will be 'mean_squared_error', which is suitable for regression problems.
# We will also track the 'mean_absolute_error' as a metric.
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['mean_absolute_error'])

# Display a summary of the model's architecture
model.summary()

In [6]:
# --- 5. Train the Model ---

# We will now train the model on our training data.
# 'epochs' is the number of times the model will cycle through the data.
# 'validation_split' reserves a portion of the training data for validation during training.
# 'verbose=1' will show the training progress.
history = model.fit(
    X_train_scaled, y_train,
    epochs=100,
    validation_split=0.2,
    verbose=1
)


Epoch 1/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 50ms/step - loss: 551.8796 - mean_absolute_error: 21.7664 - val_loss: 517.5536 - val_mean_absolute_error: 21.1801
Epoch 2/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step - loss: 545.7297 - mean_absolute_error: 21.5034 - val_loss: 469.2753 - val_mean_absolute_error: 20.0369
Epoch 3/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 498.2233 - mean_absolute_error: 20.2003 - val_loss: 409.4130 - val_mean_absolute_error: 18.5370
Epoch 4/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 400.5465 - mean_absolute_error: 17.9328 - val_loss: 335.0078 - val_mean_absolute_error: 16.5026
Epoch 5/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 355.9507 - mean_absolute_error: 16.5564 - val_loss: 245.1151 - val_mean_absolute_error: 13.8264
Epoch 6/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━

In [7]:
# --- 6. Evaluate the Model ---

# Now, we can evaluate the model's performance on the test set.
loss, mae = model.evaluate(X_test_scaled, y_test, verbose=0)

print("\n--- Model Evaluation ---")
print(f"Mean Absolute Error on Test Data: {mae:.2f}")


--- Model Evaluation ---
Mean Absolute Error on Test Data: 2.41


In [8]:
# --- 7. Make Predictions ---

# We can use our trained model to make predictions on new data.
# Here, we'll use the test set as an example.
test_predictions = model.predict(X_test_scaled).flatten()

print("\n--- Example Predictions ---")
for i in range(5):
    print(f"Predicted Price: {test_predictions[i]:.2f}, Actual Price: {y_test.iloc[i]:.2f}")

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step

--- Example Predictions ---
Predicted Price: 28.20, Actual Price: 23.60
Predicted Price: 33.82, Actual Price: 32.40
Predicted Price: 18.77, Actual Price: 13.60
Predicted Price: 26.49, Actual Price: 22.80
Predicted Price: 16.41, Actual Price: 16.10
