In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [None]:
df_houses = pd.read_csv("../Datasets/houses_to_rent_fiap.csv", delimiter=";")

In [None]:
df_houses.head()

In [None]:
df_houses.shape

In [None]:
df_houses.info()

In [None]:
df_houses.isna().sum()

In [None]:
df_houses.hist(figsize=(13, 12))

In [None]:
df_houses.describe().T

In [None]:
df_houses["city"].value_counts()

In [None]:
df_houses.groupby('city')["rent amount (R$)"].mean().plot(kind='bar')

In [None]:
df_houses.groupby('floor')["rent amount (R$)"].mean().plot(kind='bar')

In [None]:
df_houses.groupby('furniture')["rent amount (R$)"].mean().plot(kind='bar')

In [None]:
df_houses["furniture"].value_counts()

In [None]:
df_houses.groupby('animal')["rent amount (R$)"].mean().plot(kind='bar')

In [None]:
(df_houses["animal"].value_counts() / df_houses.shape[0]) * 100

In [None]:
fig, ax = plt.subplots(figsize=(16 ,8))
sns.boxplot(ax=ax, x='floor', y='rent amount (R$)', data=df_houses)
plt.show()

In [None]:
sns.boxplot(x='rooms', y='rent amount (R$)', data=df_houses)

In [None]:
sns.boxplot(x='bathroom', y='rent amount (R$)', data=df_houses)

In [None]:
sns.boxplot(x='parking spaces', y='rent amount (R$)', data=df_houses)

In [None]:
df_original = df_houses.copy()

In [None]:
df_houses.drop(df_houses[(df_houses['floor'] == "301")].index, inplace=True)
df_houses.drop(df_houses[(df_houses['floor'] == "51")].index, inplace=True)
df_houses.drop(df_houses[(df_houses['floor'] == "35")].index, inplace=True)
df_houses.drop(df_houses[(df_houses['floor'] == "46")].index, inplace=True)


df_houses.drop(df_houses[(df_houses['rooms'] == 4) & (df_houses['rent amount (R$)'] >= 30000)].index, inplace=True)
df_houses.drop(df_houses[(df_houses['rooms'] == 3) & (df_houses['rent amount (R$)'] >= 30000)].index, inplace=True)
df_houses.drop(df_houses[(df_houses['rooms'] == 2) & (df_houses['rent amount (R$)'] >= 20000)].index, inplace=True)
df_houses.drop(df_houses[(df_houses['rooms'] == 9)].index, inplace=True)
df_houses.drop(df_houses[(df_houses['rooms'] == 13)].index, inplace=True)

df_houses.drop(df_houses[(df_houses['bathroom'] == 7) & (df_houses['rent amount (R$)'] >= 30000)].index, inplace=True)
df_houses.drop(df_houses[(df_houses['bathroom'] == 5) & (df_houses['rent amount (R$)'] >= 19000)].index, inplace=True)
df_houses.drop(df_houses[(df_houses['bathroom'] == 4) & (df_houses['rent amount (R$)'] >= 19000)].index, inplace=True)
df_houses.drop(df_houses[(df_houses['bathroom'] == 3) & (df_houses['rent amount (R$)'] >= 30000)].index, inplace=True)
df_houses.drop(df_houses[(df_houses['bathroom'] == 1) & (df_houses['rent amount (R$)'] >= 20000)].index, inplace=True)


df_houses.drop(df_houses[(df_houses['parking spaces'] == 12)].index, inplace=True)
df_houses.drop(df_houses[(df_houses['parking spaces'] == 8) & (df_houses['rent amount (R$)'] >= 19000)].index, inplace=True)
df_houses.drop(df_houses[(df_houses['parking spaces'] == 6) & (df_houses['rent amount (R$)'] >= 19000)].index, inplace=True)
df_houses.drop(df_houses[(df_houses['parking spaces'] == 3) & (df_houses['rent amount (R$)'] >= 19000)].index, inplace=True)
df_houses.drop(df_houses[(df_houses['parking spaces'] == 2) & (df_houses['rent amount (R$)'] >= 19000)].index, inplace=True)
df_houses.drop(df_houses[(df_houses['parking spaces'] == 1) & (df_houses['rent amount (R$)'] >= 19000)].index, inplace=True)
df_houses.drop(df_houses[(df_houses['parking spaces'] == 0) & (df_houses['rent amount (R$)'] >= 18000)].index, inplace=True)


In [None]:
lb = LabelEncoder()

df_houses_model = df_houses.copy()

df_houses_model["city"] = lb.fit_transform(df_houses["city"])
df_houses_model["floor"] = lb.fit_transform(df_houses["floor"])
df_houses_model["animal"] = lb.fit_transform(df_houses["animal"])
df_houses_model["furniture"] = lb.fit_transform(df_houses["furniture"])

df_houses_model.info()

In [None]:
df_houses_model.hist(figsize=(12, 10))

In [None]:
corr = df_houses_model.corr()
plt.figure(figsize=(12,10))
sns.heatmap(corr, vmax=1, square=True,annot=True,cmap='inferno')

In [None]:
features = ["city", "area", "rooms", "bathroom", "parking spaces", "furniture", "property tax (R$)", "fire insurance (R$)"]
X = df_houses_model[features]
y = df_houses_model[["rent amount (R$)"]]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [None]:
y_test.head(10)

In [None]:
train_stats = X.describe().T

In [None]:
train_stats

In [None]:
def norm(x):
  return (x - train_stats['mean']) / train_stats['std']

In [None]:
normed_train_data = norm(X_train)
normed_train_data = normed_train_data.to_numpy()

normed_test_data = norm(X_test)
normed_test_data = normed_test_data.to_numpy()

In [33]:
model_1 =  Sequential(
    [
        Dense(64, activation = 'relu'),
        Dense(64, activation = 'relu'),
        Dense(1)
    ]
)
model_1.compile(
    loss='mae',
    optimizer=tf.keras.optimizers.Adam(0.001),
    metrics=['mse']
)

history_1 = model_1.fit(
    normed_train_data, y_train,
    epochs=750,
    validation_data=(normed_test_data, y_test)
)

Epoch 104/750
Epoch 105/750
Epoch 106/750
Epoch 107/750

KeyboardInterrupt: 

In [None]:
def show_tail_epochs(hist):
    df_hist = pd.DataFrame(hist.history)
    df_hist['epoch'] = hist.epoch
    df_hist["mse_sqrt"] = np.sqrt(df_hist["mse"]) 
    return df_hist.tail(10)


In [None]:
show_tail_epochs(history_1)

In [None]:
y_hat = model_1.predict(normed_test_data)
y_hat

In [None]:
df_compare = pd.DataFrame()

df_compare["y_test"] = y_test
df_compare["y_hat"] = np.round(y_hat.tolist())
df_compare["diff"] = df_compare["y_test"] - df_compare["y_hat"]  

df_compare.head(20)