In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

from sklearn.metrics import mean_squared_error, mean_absolute_error, explained_variance_score

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

### Exploratory Analysis

In [None]:
df = pd.read_csv('C:\\Users\\alvar\\Documents\\tf_templates\\DATA\\kc_house_data.csv')
df.head()

In [None]:
df.isnull().sum()

In [None]:
df.describe().transpose()

In [None]:
sns.histplot(data=df,x='price');

In [None]:
sns.countplot(data=df,x='bedrooms');

In [None]:
sns.scatterplot(data=df, x='price', y='sqft_living');

In [None]:
sns.boxplot(data=df, x='bedrooms', y='price');

In [None]:
len(df) * 0.01

In [None]:
df_non_top = df.sort_values('price', ascending=False).iloc[216:]
sns.scatterplot(data=df_non_top, x='long', y='lat', hue='price', palette='RdYlGn');

In [None]:
sns.boxplot(data=df, x='waterfront', y='price');

In [None]:
df.info()

### Data Engineering

In [None]:
df = df.drop('id', axis=1)

In [None]:
df.head()

In [None]:
df['date'] = pd.to_datetime(df['date'])
df['month'] = df['date'].dt.month
df['year'] = df['date'].dt.year

In [None]:
sns.boxplot(data=df, x='year', y='price');

In [None]:
df.groupby('month').mean()['price'].plot();

In [None]:
df.groupby('year').mean()['price'].plot();

In [None]:
df = df.drop('date', axis=1)

In [None]:
df.columns

In [None]:
df = df.drop('zipcode', axis=1)

### Scaling and Traing-Test Split

In [None]:
X = df.drop('price', axis=1).values
y = df['price'].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

In [None]:
scaler = MinMaxScaler()
# To prevent data leakage from the test set, we only fit our scaler to the training set
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
X_train.shape

In [None]:
X_test.shape

### Creating a Model and Training

In [None]:
model = Sequential()

model.add(Dense(units=19, activation='relu'))
model.add(Dense(units=19, activation='relu'))
model.add(Dense(units=19, activation='relu'))
model.add(Dense(units=19, activation='relu'))
model.add(Dense(units=1, activation='linear'))

#### Choosing an optimizer and loss

    # For a multi-class classification problem
    model.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    # For a binary classification problem
    model.compile(optimizer='rmsprop',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    # For a mean squared error regression problem
    model.compile(optimizer='rmsprop',
                  loss='mse')

In [None]:
model.compile(optimizer='rmsprop', loss='mse')

In [None]:
model.fit(x=X_train, y=y_train, validation_data=(X_test, y_test), batch_size=128, epochs=400)

### Evaluation

In [None]:
losses = pd.DataFrame(model.history.history)
losses.plot();

In [None]:
predictions = model.predict(X_test)

In [None]:
mean_absolute_error(y_test, predictions)

In [None]:
rmse = np.sqrt(mean_squared_error(y_test, predictions))
rmse

In [None]:
explained_variance_score(y_test, predictions)

In [None]:
plt.scatter(y_test, predictions);
plt.plot(y_test, y_test, 'r');

In [None]:
y_test.shape

In [None]:
errors = y_test.reshape(6480,1) - predictions
sns.histplot(errors);

#### Predicting a new item

In [None]:
house_num = np.random.randint(100)
single_house = df.drop('price', axis=1).iloc[house_num]

In [None]:
single_house = scaler.transform(single_house.values.reshape(-1,19))
single_house

In [None]:
prediction = model.predict(single_house)
prediction

In [None]:
df.iloc[house_num]