In [1]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)

In [2]:
# Dependencies
import numpy as np
import pandas as pd

In [3]:
import tensorflow
tensorflow.keras.__version__

'2.2.4-tf'

In [4]:
houses = pd.read_csv('Resources/home_data.csv')

cut_labels_20 = ['<125k', '125-150k', '150k-175k', '175k-200k', 
                 '200k-220k', '220k-240k', '240k-260k', '260k-280k', '280k-300k',
                 '300k-320k', '320k-340k', '340k-360k', '360k-380k', '380k-400k', 
                 '400k-420k', '420k-440k', '440k-460k', '460k-480k', '480k-500k', 
                 '500k+']
cut_bins = [0, 125000, 150000, 175000, 
            200000, 220000, 240000, 260000, 280000, 
            300000, 320000, 340000, 360000, 380000, 
            400000, 420000, 440000, 460000, 480000,
            500000, 10000000]
houses["price_range"] = pd.cut(houses['price'], bins=cut_bins, labels=cut_labels_20)

houses.head(20)


Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15,price_range
0,7129300520,20141013T000000,221900.0,3,1.0,1180,5650.0,1.0,0,0,...,1180,0,1955,0,98178,47.5112,-122.257,1340,5650,220k-240k
1,6414100192,20141209T000000,538000.0,3,2.25,2570,7242.0,2.0,0,0,...,2170,400,1951,1991,98125,47.721,-122.319,1690,7639,500k+
2,5631500400,20150225T000000,180000.0,2,1.0,770,10000.0,1.0,0,0,...,770,0,1933,0,98028,47.7379,-122.233,2720,8062,175k-200k
3,2487200875,20141209T000000,604000.0,4,3.0,1960,5000.0,1.0,0,0,...,1050,910,1965,0,98136,47.5208,-122.393,1360,5000,500k+
4,1954400510,20150218T000000,510000.0,3,2.0,1680,8080.0,1.0,0,0,...,1680,0,1987,0,98074,47.6168,-122.045,1800,7503,500k+
5,7237550310,20140512T000000,1225000.0,4,4.5,5420,101930.0,1.0,0,0,...,3890,1530,2001,0,98053,47.6561,-122.005,4760,101930,500k+
6,1321400060,20140627T000000,257500.0,3,2.25,1715,6819.0,2.0,0,0,...,1715,0,1995,0,98003,47.3097,-122.327,2238,6819,240k-260k
7,2008000270,20150115T000000,291850.0,3,1.5,1060,9711.0,1.0,0,0,...,1060,0,1963,0,98198,47.4095,-122.315,1650,9711,280k-300k
8,2414600126,20150415T000000,229500.0,3,1.0,1780,7470.0,1.0,0,0,...,1050,730,1960,0,98146,47.5123,-122.337,1780,8113,220k-240k
9,3793500160,20150312T000000,323000.0,3,2.5,1890,6560.0,2.0,0,0,...,1890,0,2003,0,98038,47.3684,-122.031,2390,7570,320k-340k


In [5]:
# Filter out expensive houses here if we want to 


In [6]:

X_trimmed = houses[["bedrooms","bathrooms","sqft_living","sqft_lot","floors","waterfront","view","condition","grade","sqft_above","sqft_basement","yr_built","yr_renovated","zipcode"]]
X_trimmed = houses.drop(["id","date","price","lat","long","sqft_living15","sqft_lot15","price_range"], axis=1)
X_trimmed = houses.drop(["id","date","price","waterfront","view","yr_renovated","lat","long","sqft_living15","sqft_lot15","price_range"], axis=1)

y_prices = houses["price"]
y_ranges = houses["price_range"]

In [7]:
y.describe()


NameError: name 'y' is not defined

In [None]:
y_ranges.describe()

In [None]:
y_prices.plot(kind="hist")

In [None]:
affordable = houses[houses["price"] <= 1000000]["price"]
affordable.plot(kind="hist")

In [None]:
affordable.describe()

In [None]:
houses.shape

## Data Pre-Processing

In [None]:
#X = houses.drop(["price", "date"], axis=1)
y = houses["price"].values.reshape(-1,1)
print(X_trimmed.shape, y.shape)


In [None]:
X_trimmed.head(50)

In [None]:
y_prices = houses["price"]
y_prices.head()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X_trimmed, y_prices, random_state=42)

In [None]:
X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
y_train

In [None]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_ranges)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)


In [None]:
# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)
y_train_categorical

# Create a Deep Learning Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import r2_score

In [None]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=50, activation='relu', input_dim=11))
model.add(Dense(units=50, activation='relu'))
#model.add(Dense(units=20, activation='softmax'))  # Classification
model.add(Dense(units=1, activation='linear'))

In [None]:
# Compile and fit the model
#model.compile(optimizer='adam',
#              loss='categorical_crossentropy',
#              metrics=['accuracy'])

# Compile and fit the model
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['accuracy']
             )

In [None]:
model.summary()

In [None]:
X_train_scaled

In [None]:
model.fit(
    #X_train_scaled,
    #y_train_categorical,
    X_train_scaled,
    y_train,
    epochs=30,
    shuffle=True,
    verbose=2
)

In [None]:
model.save("Trained_Models/chris_housing_trained_v1.01.h5")

In [None]:
model = load_model("Trained_Models/chris_housing_trained_v1.h5")

## Quantify our Trained Model

In [None]:
#model_loss, model_accuracy = model.evaluate(
#    X_test, y_test_categorical, verbose=2)

#model_loss, model_accuracy = model.evaluate(
#    X_test_scaled, y_test, verbose=2)


y_test_pred = model.predict(X_test_scaled)

value = r2_score(y_test, y_test_pred)

#print(
#    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

print(value)

## Make Predictions

In [None]:
encoded_predictions = model.predict_classes(X_test[:100])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)
prediction_labels

In [None]:
encoded_predictions = model.predict_classes(X_test)
prediction_labels = label_encoder.inverse_transform(encoded_predictions)
prediction_labels

In [None]:
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test_categorical[:20])}")

In [None]:
to_categorical y_test_categorical[:20]

In [None]:
label_encoder.inverse_transform(y_test_categorical[:20])