## Import required packages

In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from tensorflow.keras.callbacks import ModelCheckpoint

### Load dataset

In [2]:
# load the housing dataset
df = pd.read_csv("USA_Housing.csv")


In [4]:
# check few rows of the data

df.sample(5)

Unnamed: 0,Avg. Area Income,Avg. Area House Age,Avg. Area Number of Rooms,Avg. Area Number of Bedrooms,Area Population,Price,Address
356,62431.9381,6.798994,8.599555,4.48,44115.983419,1523915.0,"9249 Robert Cliffs\nNew Susan, AR 48038"
580,61644.173612,7.163657,7.751754,5.17,27809.135077,1384140.0,"PSC 9115, Box 6211\nAPO AA 90999-6054"
425,51718.354612,7.509044,7.109312,3.11,49639.173237,1285924.0,"855 Carol Burgs\nFrazierchester, MS 46812-0037"
780,74490.639243,5.408653,8.457362,4.1,35563.456132,1358214.0,"238 Julie Parks\nChristopherton, NH 95146"
3086,50167.486279,7.559816,7.35823,3.08,34292.306943,1058356.0,"5575 Lori Valley Suite 277\nEast Laurenshire, ..."


In [5]:
# check the dimension of the data
df.shape

(5000, 7)

In [6]:
# check for missing values

df.isnull().sum()

Avg. Area Income                0
Avg. Area House Age             0
Avg. Area Number of Rooms       0
Avg. Area Number of Bedrooms    0
Area Population                 0
Price                           0
Address                         0
dtype: int64

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 7 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Avg. Area Income              5000 non-null   float64
 1   Avg. Area House Age           5000 non-null   float64
 2   Avg. Area Number of Rooms     5000 non-null   float64
 3   Avg. Area Number of Bedrooms  5000 non-null   float64
 4   Area Population               5000 non-null   float64
 5   Price                         5000 non-null   float64
 6   Address                       5000 non-null   object 
dtypes: float64(6), object(1)
memory usage: 273.6+ KB


In [21]:
# select the features and target
X = df.drop(["Address", "Price"], axis = 1)
y = df["Price"]


In [22]:
# split the dataset into training and test sets
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)


### Normalize the data

In [29]:
scaler = StandardScaler()
x_train_sd = scaler.fit_transform(x_train)
x_test_sd = scaler.transform(x_test)


## Model Architecture


In [30]:
model = Sequential()
# Input Layer
model.add(Dense(128, kernel_initializer="normal", input_dim = x_train.shape[1], activation = "relu"))

# Hidden Layers
model.add(Dense(256, kernel_initializer="normal", activation = "relu"))
model.add(Dense(256, kernel_initializer="normal", activation = "relu"))

# Output Layer
model.add(Dense(1, kernel_initializer="normal", activation = "linear"))

In [31]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_8 (Dense)             (None, 128)               768       
                                                                 
 dense_9 (Dense)             (None, 256)               33024     
                                                                 
 dense_10 (Dense)            (None, 256)               65792     
                                                                 
 dense_11 (Dense)            (None, 1)                 257       
                                                                 
Total params: 99,841
Trainable params: 99,841
Non-trainable params: 0
_________________________________________________________________


In [38]:
# create a Model Checkpoint
checkpoint_name = "{epoch:03d}--{val_loss:.3f}.hdf5"
#checkpoint = ModelCheckpoint(os.path.join("./Models/", checkpoint_name), verbose = 1, save_best_only = True)
checkpoint = ModelCheckpoint("housing.hdf5", verbose = 1, save_best_only = True)
callbacks_list = [checkpoint]


In [39]:
# Compile the model
#adam = Adam()
model.compile(optimizer="adam", loss = "mse", metrics = ["mean_squared_error"])

In [41]:
# train the model
hist = model.fit(x_train_sd, y_train, batch_size=64, epochs = 5, callbacks = callbacks_list, validation_split = 0.2)

Epoch 1/5
Epoch 1: val_loss improved from inf to 1645057671168.00000, saving model to housing.hdf5
Epoch 2/5
Epoch 2: val_loss improved from 1645057671168.00000 to 1610540384256.00000, saving model to housing.hdf5
Epoch 3/5
Epoch 3: val_loss improved from 1610540384256.00000 to 1484208209920.00000, saving model to housing.hdf5
Epoch 4/5
Epoch 4: val_loss improved from 1484208209920.00000 to 1171528876032.00000, saving model to housing.hdf5
Epoch 5/5
Epoch 5: val_loss improved from 1171528876032.00000 to 664365694976.00000, saving model to housing.hdf5
