In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input, InputLayer
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import root_mean_squared_error

In [3]:
data = pd.read_csv('housing.csv')

In [None]:
print(data.info())
print(data.describe())
print(" ")
print(data.isnull().sum())

In [4]:
X = data.drop('median_house_value', axis=1)

X = pd.get_dummies(X, columns=['ocean_proximity'], drop_first=True)

X = X.dropna()

In [5]:
y =data['median_house_value']
y = y[X.index]

In [6]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)

X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [None]:
# Print the shapes of the splits
print(f"Training set: {X_train.shape}, {y_train.shape}")
print(f"Validation set: {X_val.shape}, {y_val.shape}")
print(f"Test set: {X_test.shape}, {y_test.shape}")

In [7]:
medium_nn = Sequential(
    [
        Input(shape=(12,)),
        Dense(32, activation='relu'),
        Dense(16, activation='relu'),
        Dense(1, activation='linear')
    ]
)

optimizer = Adam(learning_rate=0.1)
cp = ModelCheckpoint('models/medium_nn.keras', save_best_only=True)
medium_nn.compile(optimizer=optimizer, loss='mse', metrics=[RootMeanSquaredError()])
medium_nn.fit(x=X_train, y=y_train, validation_data=(X_val, y_val), callbacks=[cp], epochs=100)

Epoch 1/100
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 20648179712.0000 - root_mean_squared_error: 141703.1406 - val_loss: 10859945984.0000 - val_root_mean_squared_error: 104211.0625
Epoch 2/100
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 10376749056.0000 - root_mean_squared_error: 101828.1172 - val_loss: 8207210496.0000 - val_root_mean_squared_error: 90593.6562
Epoch 3/100
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 7945742336.0000 - root_mean_squared_error: 89131.2344 - val_loss: 10492327936.0000 - val_root_mean_squared_error: 102432.0625
Epoch 4/100
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 7471092736.0000 - root_mean_squared_error: 86327.6094 - val_loss: 6518888960.0000 - val_root_mean_squared_error: 80739.6406
Epoch 5/100
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 6414197760.0000 - roo

<keras.src.callbacks.history.History at 0x11bb9b2a7b0>

In [8]:
from tensorflow.keras.models import load_model

In [9]:
medium_nn = load_model('models/medium_nn.keras')
# Below will return RMSE value as squared is false
root_mean_squared_error(medium_nn.predict(X_train), y_train), root_mean_squared_error(medium_nn.predict(X_val), y_val)

[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


(63160.867414339285, 62728.49199954467)

In [10]:
large_nn = Sequential(
    [
        Input(shape=(12,)),
        Dense(256, activation='relu'),
        Dense(128, activation='relu'),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        # Dense(16, activation='relu'),
        Dense(1, activation='linear')
    ]
)

optimizer = Adam(learning_rate=0.1)
cp = ModelCheckpoint('models/large_nn.keras', save_best_only=True)
large_nn.compile(optimizer=optimizer, loss='mse', metrics=[RootMeanSquaredError()])
large_nn.fit(x=X_train, y=y_train, validation_data=(X_val, y_val), callbacks=[cp], epochs=100)

Epoch 1/100
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 26433175552.0000 - root_mean_squared_error: 160435.2031 - val_loss: 10273703936.0000 - val_root_mean_squared_error: 101359.2812
Epoch 2/100
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 10150651904.0000 - root_mean_squared_error: 100734.0547 - val_loss: 7757383168.0000 - val_root_mean_squared_error: 88076.0078
Epoch 3/100
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 10701068288.0000 - root_mean_squared_error: 103236.1016 - val_loss: 12659579904.0000 - val_root_mean_squared_error: 112514.7969
Epoch 4/100
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 8595082240.0000 - root_mean_squared_error: 92615.0000 - val_loss: 11427513344.0000 - val_root_mean_squared_error: 106899.5469
Epoch 5/100
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 7191569408.0000 -

<keras.src.callbacks.history.History at 0x11bbd946900>

In [11]:
large_nn = load_model('models/large_nn.keras')
# Below will return RMSE value as squared is false
root_mean_squared_error(large_nn.predict(X_train), y_train), root_mean_squared_error(large_nn.predict(X_val), y_val)

[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 873us/step
[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 926us/step


(67406.10606840745, 66309.40364785239)