In [None]:
#import libraires
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn import metrics
from tensorflow.keras import layers, callbacks
from tensorflow import keras

In [None]:
#read csv file
df = pd.read_csv('housing.csv')


In [None]:
#Check dataset
print("The Shape:",df.shape)
print("The NULL values:\n",df.isnull().sum())
print("Number of duplicated values",df.duplicated().sum())

In [None]:
# drop null values
df.dropna(inplace=True)

In [None]:
#descriptive statistics
df.describe()

In [None]:
#summary of a DataFrame
df.info()

In [None]:
# apply Label encoder
df_categorical = df.select_dtypes(include='object')
encoder = LabelEncoder()
df_categorical = df_categorical.apply(encoder.fit_transform)
df.drop(df_categorical.columns, axis=1, inplace=True)
df = pd.concat([df, df_categorical], axis=1)

In [None]:
# Define features and target variable
features = [
    'longitude', 'latitude', 'housing_median_age', 'total_rooms',
    'total_bedrooms', 'population', 'households', 'median_income'
]
target = 'median_house_value'

In [None]:
# Define features and target variable
X = df[features]
y = np.log1p(df[target])

In [None]:
# Split data into training and testing sets (70% train, 30% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# build network model
model = keras.Sequential([
    layers.Dense(256, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.3),
    layers.Dense(128, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.3),
    layers.Dense(64, activation='relu'),
    layers.Dense(1)
])

In [None]:
# compile the model
model.compile(
    optimizer='adam',
    loss='mse',
    metrics=['mae']
)

In [None]:
# apply early stopping 
early_stop = callbacks.EarlyStopping(
    patience=20,
    min_delta=0.0001,
    restore_best_weights=True,
)

In [None]:
# train the model
history = model.fit(
    X_train_scaled, y_train,
    validation_split=0.2,
    epochs=300,
    callbacks=[early_stop],
)

In [None]:
#compare the model loss against the actual val_loss
history_df = pd.DataFrame(history.history)
history_df[['loss', 'val_loss']].plot(title="Mean Squared Error")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid(True)
plt.show()

#compare the model mae against the actual val mae
history_df[['mae', 'val_mae']].plot(title="Mean Absolute Error")
plt.xlabel("Epoch")
plt.ylabel("Error")
plt.grid(True)
plt.show()

In [None]:
# Calculate performance metrics
y_pred = model.predict(X_test_scaled).flatten()
y_pred_actual = np.expm1(y_pred)
y_test_actual = np.expm1(y_test)

rmse = metrics.root_mean_squared_error(y_test_actual, y_pred_actual)
r2 = metrics.r2_score(y_test_actual, y_pred_actual)
mae = metrics.mean_absolute_error(y_test_actual, y_pred_actual)

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R² Score: {r2:.4f}")

In [None]:
# Calculate performance metrics with Log
y_pred = model.predict(X_test_scaled)
rmse = metrics.root_mean_squared_error(y_test, y_pred)
r2 = metrics.r2_score(y_test, y_pred)
mae = metrics.mean_absolute_error(y_test,y_pred)

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R² Score: {r2:.4f}")

In [None]:
# test evaluate
test_loss, test_mae = model.evaluate(X_test_scaled, y_test)
print(f"Test RMSE: {np.sqrt(test_loss):.4f}")
print(f"Test MAE: {test_mae:.4f}")