###About the Dataset
Context:

Melbourne real estate is BOOMING. Can you find the insight or predict the next big trend to become a real estate mogul… or even harder, to snap up a reasonably priced 2-bedroom unit?

Content:

This is a snapshot of a dataset created by Tony Pino.
It was scraped from publicly available results posted every week from Domain.com.au. He cleaned it well, and now it's up to you to make data analysis magic. The dataset includes Address, Type of Real estate, Suburb, Method of Selling, Rooms, Price, Real Estate Agent, Date of Sale and distance from C.B.D.

In [None]:
import torch
import pandas as pd
import numpy as np
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error as mse
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
import datetime

In [None]:
df = pd.read_csv('/content/melb_data.csv')
df.head()

In [None]:
from sklearn.preprocessing import LabelEncoder
categorical_cols = df.select_dtypes(include=['object']).columns
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])

In [None]:
df.fillna(df.median(numeric_only=True), inplace=True)

# Fill missing values for the 'CouncilArea' column with the mode
mode_council = df['CouncilArea'].mode()[0]
df['CouncilArea'].fillna(mode_council, inplace=True)

In [None]:

X = df.drop(columns=['Price'])
y = df['Price'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=24)

In [None]:
scaler = MinMaxScaler()
scalerY = MinMaxScaler()

y_train = scalerY.fit_transform(y_train.reshape(-1, 1))
X_train = scaler.fit_transform(X_train)
y_test = scalerY.transform(y_test.reshape(-1, 1))
X_test = scaler.transform(X_test)

In [None]:
X_torch = torch.from_numpy(X_train)
y_torch = torch.from_numpy(y_train)
print(X_torch.size())
print(y_torch.size())

In [None]:
torch.manual_seed(24)
# Create a model
model = nn.Sequential(
    nn.Linear(in_features=X_train.shape[1], out_features=14),
    nn.ReLU(),
    nn.Linear(14, 10),
    nn.ReLU(),
    nn.Linear(10, 1))

In [None]:
criterion = torch.nn.MSELoss()
# Construct the optimizer (Stochastic Gradient Descent in this case)
optimizer = torch.optim.RMSprop(model.parameters(), lr = 0.1)
optimizer

In [None]:
model

In [None]:
y_pred = model(X_torch.float())
y_torch = y_torch.unsqueeze(1)
print(y_torch.shape)
print(y_pred.shape)

In [None]:
# Gradient Descent

for epoch in np.arange(0,1000):
   # Forward pass: Compute predicted y by passing x to the model
   y_pred = model(X_torch.float())

   # Compute and print loss
   loss = criterion(y_pred, y_torch.float())
   #print('epoch: ', epoch+1,' loss: ', loss.item())

   # Zero gradients, perform a backward pass, and update the weights.
   optimizer.zero_grad()

   # perform a backward pass (backpropagation)
   loss.backward()

   # Update the parameters
   optimizer.step()
   if epoch % 100 == 0:
       print('epoch: ', epoch+1,' loss: ', loss.item())

In [None]:
#model = model.eval()
X_torch_test = torch.from_numpy(X_test)
y_pred = model(X_torch_test.float())
y_pred = y_pred.detach().numpy()

In [None]:
y_pred = y_pred.reshape(-1, 1)
y_pred_orig = scalerY.inverse_transform(y_pred)

print(y_pred_orig[:5])

In [None]:
print(y_test[:5])

In [None]:
print(f"Shape of y_pred_orig: {y_pred_orig.shape}")
print(f"Shape of y_test: {y_test.shape}")

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

mse_value = mean_squared_error(y_test, y_pred_orig)
mae_value = mean_absolute_error(y_test, y_pred_orig)
r2_value = r2_score(y_test, y_pred_orig)

In [None]:
print(f"MSE: {mse_value}")
print(f"MAE: {mae_value}")
print(f"R2: {r2_value}")

##TenserFlow

In [None]:
df = pd.read_csv('/content/melb_data.csv')
df.head()

In [None]:
from sklearn.preprocessing import LabelEncoder
categorical_cols = df.select_dtypes(include=['object']).columns
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])

In [None]:
df.fillna(df.median(numeric_only=True), inplace=True)

# Fill missing values for the 'CouncilArea' column with the mode
mode_council = df['CouncilArea'].mode()[0]
df['CouncilArea'].fillna(mode_council, inplace=True)

In [None]:
df.shape

In [None]:
X = df.drop('Price', axis=1)
y = df['Price']

In [None]:
X.columns

In [None]:
y.head()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X.values, y.values, test_size = 0.3,random_state=24)

In [None]:
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_trn_scl = scaler_X.fit_transform(X_train)
y_trn_scl = scaler_y.fit_transform(y_train.reshape(-1,1))
X_tst_scl = scaler_X.transform(X_test)
y_tst_scl = scaler_y.transform(y_test.reshape(-1,1))

In [None]:
tf.random.set_seed(24)
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(14, activation='relu', input_shape=(X_trn_scl.shape[1],)),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(1, activation='linear')])

model.compile(optimizer=tf.keras.optimizers.RMSprop(),
              loss='mean_squared_error',
              metrics=['mae'])

In [None]:
print(model.summary())

####Early Stopping

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

monitor = EarlyStopping(monitor='val_loss',
                        min_delta=0.001,
                        patience=20,
                        verbose=1,
                        restore_best_weights=True)
history2 = model.fit(X_trn_scl, y_trn_scl,
                     validation_data=(X_tst_scl, y_tst_scl),
                     callbacks=[monitor],
                     verbose=2,
                     epochs=500)


In [None]:
y_pred_ = model.predict(X_tst_scl)

In [None]:
y_pred_orig = scaler_y.inverse_transform(y_pred_)

In [None]:
print(f"y_test shape: {y_test.shape}")
print(f"y_pred_orig shape: {y_pred_orig.shape}")

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

MSE = mean_squared_error(y_test, y_pred_orig)
MAE = mean_absolute_error(y_test, y_pred_orig)
r2 = r2_score(y_test, y_pred_orig)

In [None]:
print(f"MSE: {MSE}")
print(f"MAE: {MAE}")
print(f"R2: {r2}")

##Conclusion

####Using PyTorch
* RMSprop :
            MSE: 0.005153353707856875
            MAE: 0.05247591353505907
            R2: -0.006395448771884249
* SGD :
       MSE: 0.005002161577744846
       MAE: 0.05136918252524957
       R2: 0.023130774394767406

####Using TenserFlow
* RMSprop :
            MSE: 0.0033055019746956694
            MAE: 0.04693921633891662
            R2: 0.35447044161390406

* SGD:
      MSE: 0.004058409188259081
      MAE: 0.04490980810948505
      R2: 0.20743562971607055