# Workshop 2: House Pricing Regression
In this workshop you will need to train a regression neural network to estimate the pricing of houses of California. The main blocks of the workshop are:

- Get the data from Keras repository and visualize it.
- Pre-process the data.
- Design the network.
- Train the network.
- Evaluate the model.

[Info of the data](https://www.kaggle.com/datasets/camnugent/california-housing-prices))

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import Adam
from matplotlib import pyplot as plt

tf.random.set_seed(1234)

# 1. Get the data (Already in Colab)

In [None]:
TRAIN_DATA_PATH = '/content/sample_data/california_housing_train.csv'
TEST_DATA_PATH = '/content/sample_data/california_housing_test.csv'
TARGET_NAME = 'median_house_value'

In [None]:
train_data = pd.read_csv(TRAIN_DATA_PATH)
test_data = pd.read_csv(TEST_DATA_PATH)

In [None]:
train_data.head()

In [None]:
train_data.shape, test_data.shape

In [None]:
x_train, y_train = train_data.drop(TARGET_NAME, axis=1), train_data[TARGET_NAME]
x_test, y_test = test_data.drop(TARGET_NAME, axis=1), test_data[TARGET_NAME]

In [None]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

# 2. Pre-process the data.

In [None]:
standard_scaler = StandardScaler()
x_train_scaled = pd.DataFrame(
    standard_scaler.fit_transform(x_train),
    columns=x_train.columns
)
x_test_scaled = pd.DataFrame(
    standard_scaler.transform(x_test),
    columns = x_test.columns
)

In [None]:
x_train_scaled.head()

# 3. Design the network.

In [None]:
def create_model():
  model = Sequential([
      Dense(32, input_dim=8, activation='relu'),
      Dense(64, activation='relu'),
      Dense(32, activation='relu'),
      Dense(1, activation='linear')
    ])
  return model

In [None]:
model = create_model()
model.summary()

In [None]:
model.compile(
    loss='mse',
    optimizer=Adam(learning_rate=1e-3),
    metrics=['mse']
)

# 4. Train the network.

In [None]:
history = model.fit(
    x_train_scaled.values,
    y_train.values,
    epochs=10,
    batch_size=64,
    validation_split=0.2
)

In [None]:
def plot_history(history, key):
  plt.plot(history.history[key])
  plt.plot(history.history['val_'+key])
  plt.xlabel("Epochs")
  plt.ylabel(key)
  plt.legend([key, 'val_'+key])
  plt.show()

In [None]:
# Loss: MSE | Learnin Rate: 1e-03
plot_history(history, 'mse')

In [None]:
# Loss: MSE | Learnin Rate: 1e-02
model = create_model()
model.compile(
    loss='mse',
    optimizer=Adam(learning_rate=1e-2),
    metrics=['mse']
)
history = model.fit(
    x_train_scaled.values,
    y_train.values,
    epochs=10,
    batch_size=64,
    validation_split=0.2
)
plot_history(history, 'mse')

In [None]:
# Loss: MSLE | Learnin Rate: 1e-03
model = create_model()
model.compile(
    loss='msle',
    optimizer=Adam(learning_rate=1e-3),
    metrics=['msle']
)
history = model.fit(
    x_train_scaled.values,
    y_train.values,
    epochs=10,
    batch_size=64,
    validation_split=0.2
)
plot_history(history, 'msle')

In [None]:
# Loss: MSLE | Learnin Rate: 1e-02
model = create_model()
model.compile(
    loss='msle',
    optimizer=Adam(learning_rate=1e-2),
    metrics=['msle']
)
history = model.fit(
    x_train_scaled.values,
    y_train.values,
    epochs=10,
    batch_size=64,
    validation_split=0.2
)
plot_history(history, 'msle')

In [None]:
# Loss: MSLE | Learnin Rate: 1
model = create_model()
model.compile(
    loss='msle',
    optimizer=Adam(learning_rate=1),
    metrics=['msle']
)
history = model.fit(
    x_train_scaled.values,
    y_train.values,
    epochs=10,
    batch_size=64,
    validation_split=0.2
)
plot_history(history, 'msle')

# Exercise 1: Re-train the model to improve its performance and evaluate it on the test set

In [None]:
# Train the model


In [None]:
# Predict on the test set


In [None]:
# Compare the results with the ground truth
