In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv("kc_house_data.csv ")
print(df)
df.head(5).T
df.info()

#Visualizing house prices
fig = plt.figure(figsize=(7,7))
fig.add_subplot(2,1,1)
sns.distplot(df['price'],color="k")
fig.add_subplot(2,1,2)
sns.boxplot(df['price'], palette="Set2")

#Visualizing square footage of (home,lot,above and basement)
fig = plt.figure(figsize=(16,7))
sns.lmplot(x='sqft_above',y='price',data=df,fit_reg=False)
sns.lmplot(x='sqft_lot',y='price',data=df,fit_reg=False)
sns.lmplot(x='sqft_living',y='price',data=df,fit_reg=False)
sns.lmplot(x='sqft_basement',y='price',data=df,fit_reg=False)

#Visualizing bedrooms,bathrooms,floors,grade
fig = plt.figure(figsize=(15,7))
fig.add_subplot(2,2,1)
sns.countplot(df['bedrooms'],data=df,palette="Set3")
fig.add_subplot(2,2,2)
sns.countplot(y='floors',data=df,palette="Set2")
fig.add_subplot(2,2,3)
sns.countplot(df['bathrooms'],palette="Set2")
fig.add_subplot(2,2,4)
sns.countplot(df['grade'],palette="Set1")
price_correlation = df.corr()['price'].sort_values(ascending=False)
print(price_correlation)

#Data visualization house price vs months and years
df['date'] = pd.to_datetime(df['date'])
df['month'] = df['date'].apply(lambda date:date.month)
df['year'] = df['date'].apply(lambda date:date.year)
fig = plt.figure(figsize=(16,5))
fig.add_subplot(1,2,1)
df.groupby('month').mean()['price'].plot()
fig.add_subplot(1,2,2)
df.groupby('year').mean()['price'].plot()


df.drop(['id', 'date','zipcode'], axis=1, inplace=True)
dataset = df.values
X = dataset[:, 1:20]
y = dataset[:, 0]
x_train, x_test, y_train, y_test = train_test_split(X,y, test_size=0.20)
y_train = y_train.reshape((-1, 1))
y_test = y_test.reshape((-1, 1))

#Standardization scaler
x_scaler = StandardScaler()
x_train = x_scaler.fit_transform(x_train)
y_scaler = StandardScaler()
y_train = y_scaler.fit_transform(y_train)

#Train and create the model
batch_size = 128
epochs = 400
model = Sequential()
model.add(Dense(64, activation= "relu",input_shape=(19,)))
model.add(Dropout(0.2))
model.add(Dense(64, activation= "relu"))
model.add(Dropout(0.2))
model.add(Dense(1, activation= "linear"))
model.compile(loss= "mse", optimizer= "Adam")
hist=model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose =1, validation_split=(0.1))
x_test = x_scaler.transform(x_test)
y_test = y_scaler.transform(y_test)
model.evaluate(x_test, y_test)
new_sample = [[3,1.00,1180,5650,1.0,0,0,3,7,1180,0,1955,0,47.5112,-122.257,1340,5650,10,2014]]
new_sample_normalised = x_scaler.transform(new_sample)


raw_output = model.predict(new_sample_normalised)
output = y_scaler.inverse_transform(raw_output)
print(output)
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper right')
plt.show()
model.summary()


y_pred = model.predict(x_test)
from sklearn import metrics
print('MAE:', metrics.mean_absolute_error(y_test, y_pred))
print('MSE:', metrics.mean_squared_error(y_test, y_pred))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
print('VarScore:',metrics.explained_variance_score(y_test,y_pred))

# Visualizing the predictions
fig = plt.figure(figsize=(10,5))
plt.scatter(y_test,y_pred)

# Perfect predictions
plt.plot(y_test,y_test,'r')
fig = plt.figure(figsize=(10,5))
residuals = (y_test- y_pred)
sns.distplot(residuals)