In [1]:
# Importing necessary libraries
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Flatten, InputLayer
import keras

In [3]:
# Reading the data
train = pd.read_csv('house-price.csv')
trainX, trainY = train.iloc[:, :train.shape[1]-1], train.iloc[:, train.shape[1]-1]


In [4]:
# There are a total of 43 categorical columns
categoricals = trainX.loc[:, trainX.dtypes == 'O'].columns
len(categoricals) # 43

43

In [6]:
 #Preprocessing step: 
# One Hot Encoder cannot work with NaN, hence filling NaN with mode of categorical columns
cat_features = trainX.loc[:, categoricals]
cat_features = cat_features.fillna(cat_features.mode().iloc[0, :])


In [7]:
# One hot encoding these features
ohe = OneHotEncoder(handle_unknown='ignore')
res = ohe.fit_transform(cat_features).toarray()


In [8]:
cols = np.array([])
for i in range(cat_features.shape[1]):
    cols = np.concatenate((cols, categoricals[i] + '_' + np.sort(cat_features.iloc[:, i].unique())))    
cat = pd.DataFrame(res, columns=cols)

In [9]:
# Total 252 categorical features
cat.shape # (1460, 252)
# Dropping original categorical variables
trainX = trainX.drop(categoricals, axis=1)
# Concatenating the One Hot Encoded variables to the train dataset
trainX = pd.concat([trainX, cat], axis=1)
# New data shape
trainX.shape # (1460, 289)
# Filling the NaN with median
trainX.fillna(trainX.median(), inplace=True)


In [10]:
# Normalizing training features
scalar = MinMaxScaler()
norm_train = pd.DataFrame(scalar.fit_transform(trainX), columns=trainX.columns)
# Normalizing training target
scalar_target = MinMaxScaler()
trainY = scalar_target.fit_transform(trainY.values.reshape(-1, 1))

In [12]:
# Defining the network
model = keras.Sequential([
  Dense(norm_train.shape[1], input_dim=norm_train.shape[1], activation='sigmoid'),    
  Dense(units=norm_train.shape[1]//2, activation='sigmoid'),    
  Dense(units=1, activation='softmax'),
])

In [13]:
# Printing model summary
model.summary()

In [14]:
# Compiling and Training Network
model.compile(optimizer='sgd', loss='mean_squared_error')
model.fit(trainX, trainY, batch_size=512, epochs=20, verbose=1, validation_split=0.2)

Epoch 1/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.6495 - val_loss: 0.6456
Epoch 2/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6497 - val_loss: 0.6456
Epoch 3/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.6473 - val_loss: 0.6456
Epoch 4/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6481 - val_loss: 0.6456
Epoch 5/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6476 - val_loss: 0.6456
Epoch 6/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6495 - val_loss: 0.6456
Epoch 7/20




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6469 - val_loss: 0.6456
Epoch 8/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6511 - val_loss: 0.6456
Epoch 9/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.6477 - val_loss: 0.6456
Epoch 10/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6502 - val_loss: 0.6456
Epoch 11/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6465 - val_loss: 0.6456
Epoch 12/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6465 - val_loss: 0.6456
Epoch 13/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6503 - val_loss: 0.6456
Epoch 14/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6484 - val_loss: 0.6456
Epoch 15/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/

<keras.src.callbacks.history.History at 0x17fe22de0>

In [16]:
# Defining the network
model = keras.Sequential([
  Dense(norm_train.shape[1], input_dim=norm_train.shape[1], activation='sigmoid'),    
  Dense(units=norm_train.shape[1]//2, activation='sigmoid'),    
    # 3rd hidden layer (new)
    Dense(units=32, activation='sigmoid'),
    
    # 4th hidden layer (new)
    Dense(units=32, activation='sigmoid'),
    
    # 5th hidden layer (new)
    Dense(units=16, activation='sigmoid'),
  Dense(units=1, activation='softmax'),
])

In [17]:
# Printing model summary
model.summary()

In [18]:
# Compiling and Training Network
model.compile(optimizer='sgd', loss='mean_squared_error')
model.fit(trainX, trainY, batch_size=512, epochs=20, verbose=1, validation_split=0.2)

Epoch 1/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 0.6486 - val_loss: 0.6456
Epoch 2/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6495 - val_loss: 0.6456
Epoch 3/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6475 - val_loss: 0.6456
Epoch 4/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6488 - val_loss: 0.6456
Epoch 5/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6463 - val_loss: 0.6456
Epoch 6/20




[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6493 - val_loss: 0.6456
Epoch 7/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.6498 - val_loss: 0.6456
Epoch 8/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.6501 - val_loss: 0.6456
Epoch 9/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6479 - val_loss: 0.6456
Epoch 10/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6470 - val_loss: 0.6456
Epoch 11/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6486 - val_loss: 0.6456
Epoch 12/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6484 - val_loss: 0.6456
Epoch 13/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6494 - val_loss: 0.6456
Epoch 14/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/s

<keras.src.callbacks.history.History at 0x300969370>