## Preprocessing

In [None]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf

#  Import and read the csv.
import pandas as pd
house_df = pd.read_csv("train_processed.csv")
house_df.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice,Distance from 2008,Adjusted Sale Price
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,,,0,2,2008,WD,Normal,208500,0,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,,,0,5,2007,WD,Normal,181500,1,191483
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,,,0,9,2008,WD,Normal,223500,0,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,,,0,2,2006,WD,Abnorml,140000,2,155400
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,,,0,12,2008,WD,Normal,250000,0,250000


In [None]:
house_df.dtypes

Id                       int64
MSSubClass               int64
MSZoning                object
LotFrontage            float64
LotArea                  int64
                        ...   
SaleType                object
SaleCondition           object
SalePrice                int64
Distance from 2008       int64
Adjusted Sale Price      int64
Length: 83, dtype: object

In [None]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
house_df = house_df.drop(columns=["Id","Alley","LandSlope","RoofMatl","Condition2","Exterior1st","Exterior2nd","MasVnrType","MasVnrArea","BsmtExposure","BsmtFinType1","BsmtFinType2","BsmtFinSF2","Heating","SaleCondition","SaleType","YrSold","MoSold","MiscVal","MiscFeature","ScreenPorch","3SsnPorch","LowQualFinSF","Electrical","Distance from 2008","Adjusted Sale Price"])

In [None]:
# Determine the number of unique values in each column.
house_df.nunique()

MSSubClass         15
MSZoning            5
LotFrontage       110
LotArea          1073
Street              2
LotShape            4
LandContour         4
Utilities           2
LotConfig           5
Neighborhood       25
Condition1          9
BldgType            5
HouseStyle          8
OverallQual        10
OverallCond         9
YearBuilt         112
YearRemodAdd       61
RoofStyle           6
ExterQual           4
ExterCond           5
Foundation          6
BsmtQual            4
BsmtCond            4
BsmtFinSF1        637
BsmtUnfSF         780
TotalBsmtSF       721
HeatingQC           5
CentralAir          2
1stFlrSF          753
2ndFlrSF          417
GrLivArea         861
BsmtFullBath        4
BsmtHalfBath        3
FullBath            4
HalfBath            3
BedroomAbvGr        8
KitchenAbvGr        4
KitchenQual         4
TotRmsAbvGrd       12
Functional          7
Fireplaces          4
FireplaceQu         5
GarageType          6
GarageYrBlt        97
GarageFinish        3
GarageCars

In [None]:
# Categorizations based on Quartiles

house_df['Sales_bin'] = pd.qcut(house_df['SalePrice'],
                           q=[0, .25, .5, .75, 1],
                           labels=['D', 'C', 'B', 'A'])

house_df.head()

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,LotShape,LandContour,Utilities,LotConfig,Neighborhood,...,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,PoolArea,PoolQC,Fence,SalePrice,Sales_bin
0,60,RL,65.0,8450,Pave,Reg,Lvl,AllPub,Inside,CollgCr,...,TA,Y,0,61,0,0,,,208500,B
1,20,RL,80.0,9600,Pave,Reg,Lvl,AllPub,FR2,Veenker,...,TA,Y,298,0,0,0,,,181500,B
2,60,RL,68.0,11250,Pave,IR1,Lvl,AllPub,Inside,CollgCr,...,TA,Y,0,42,0,0,,,223500,A
3,70,RL,60.0,9550,Pave,IR1,Lvl,AllPub,Corner,Crawfor,...,TA,Y,0,35,272,0,,,140000,C
4,60,RL,84.0,14260,Pave,IR1,Lvl,AllPub,FR2,NoRidge,...,TA,Y,192,84,0,0,,,250000,A


In [None]:
house_df = house_df.drop(columns=["SalePrice"])

In [None]:
# Convert categorical data to numeric with `pd.get_dummies`
house_df_dummies = pd.get_dummies(house_df)
house_df_dummies.head()

Unnamed: 0,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,BsmtFinSF1,BsmtUnfSF,TotalBsmtSF,...,PoolQC_Fa,PoolQC_Gd,Fence_GdPrv,Fence_GdWo,Fence_MnPrv,Fence_MnWw,Sales_bin_D,Sales_bin_C,Sales_bin_B,Sales_bin_A
0,60,65.0,8450,7,5,2003,2003,706,150,856,...,False,False,False,False,False,False,False,False,True,False
1,20,80.0,9600,6,8,1976,1976,978,284,1262,...,False,False,False,False,False,False,False,False,True,False
2,60,68.0,11250,7,5,2001,2002,486,434,920,...,False,False,False,False,False,False,False,False,False,True
3,70,60.0,9550,7,5,1915,1970,216,540,756,...,False,False,False,False,False,False,False,True,False,False
4,60,84.0,14260,8,5,2000,2000,655,490,1145,...,False,False,False,False,False,False,False,False,False,True


In [None]:
# Split our preprocessed data into our features and target arrays
y = house_df_dummies[["Sales_bin_D","Sales_bin_C","Sales_bin_B","Sales_bin_A"]]
X = house_df_dummies.drop(["Sales_bin_D","Sales_bin_C","Sales_bin_B","Sales_bin_A"], axis=1).values
# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Compile, Train and Evaluate the Model

In [None]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train[0])
hidden_nodes_layer1 =  80
hidden_nodes_layer2 = 50
hidden_nodes_layer3 = 50
nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Third hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(4, activation='softmax'))

# Check the structure of the model
nn.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_7 (Dense)             (None, 80)                14320     
                                                                 
 dense_8 (Dense)             (None, 50)                4050      
                                                                 
 dense_9 (Dense)             (None, 50)                2550      
                                                                 
 dense_10 (Dense)            (None, 4)                 204       
                                                                 
Total params: 21124 (82.52 KB)
Trainable params: 21124 (82.52 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
# Compile the model
nn.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Train the model
fit_model = nn.fit(X_train_scaled,y_train,epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

12/12 - 0s - loss: nan - accuracy: 0.2685 - 290ms/epoch - 24ms/step
Loss: nan, Accuracy: 0.2684931457042694


In [None]:
# Testing models with keras_tuner

# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])

    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=40,
        step=2), activation=activation, input_dim=178))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 4)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=20,
            step=2),
            activation=activation))

    nn_model.add(tf.keras.layers.Dense(4, activation='softmax'))

    # Compile the model
    nn_model.compile(loss="categorical_crossentropy", optimizer='adam', metrics=["accuracy"])

    return nn_model

In [None]:
!pip install keras_tuner

In [None]:
# Import the kerastuner library

import keras_tuner as kt

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

Reloading Tuner from ./untitled_project/tuner0.json


In [None]:
# Run the kerastuner search for best hyperparameters
tuner.search(X_train_scaled,y_train,epochs=20,validation_data=(X_test_scaled,y_test))

Trial 56 Complete [00h 00m 06s]
val_accuracy: 0.2684931457042694

Best val_accuracy So Far: 0.2684931457042694
Total elapsed time: 00h 04m 36s
