In [None]:
import pandas as pd
from sklearn import datasets

import numpy as np
import matplotlib.pyplot as plt
import os
import datetime


In [None]:
from sklearn import datasets

import pandas as pd
import numpy as np


from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()


raw_dataset = pd.DataFrame(housing['data'], columns=housing['feature_names'])
raw_dataset.loc[:, 'target'] = housing['target']
dataset = raw_dataset.copy()

In [None]:
dataset.tail()

In [None]:
dataset.shape

### Dataset description

Number of Instances: 20640 

Number of Attributes: 8 numeric, predictive attributes and the target

Attribute Information:

|Feature|Description|
|---|---| 
|MedInc    |     median income in block group| 
|HouseAge   |    median house age in block group| 
|AveRooms   |    average number of rooms per household| 
|AveBedrms  |    average number of bedrooms per household| 
|Population |    block group population| 
|AveOccup    |   average number of household members| 
|Latitude  |     block group latitude|  
|Longitude  |    block group longitude| 





## División train y test

In [None]:
train_dataset = dataset.sample(frac=0.8, random_state=0)
test_dataset = dataset.drop(train_dataset.index)

In [None]:
train_dataset.shape

In [None]:
test_dataset.shape

# Data exploration

In [None]:
import seaborn as sns

In [None]:
train_dataset.describe().transpose()

In [None]:
fig, ax = plt.subplots(figsize=(12,10))
sns.heatmap(train_dataset.corr(), annot=True, ax=ax) 

# Data preprocessing

- Separar la etiqueta o valor a predecir de las features.

In [None]:
train_features = train_dataset.copy()
test_features = test_dataset.copy()

train_labels = train_features.pop('target')
test_labels = test_features.pop('target')

Normalization

In [None]:
train_dataset.describe().transpose()[['mean', 'std']]

- Es una buena práctica normalizar las features para que esten todas en el mismo rango.

In [None]:
train_mean = train_features.mean()
train_std = train_features.std()

train_features = (train_features - train_mean) / train_std
test_features = (test_features - train_mean) / train_std

In [None]:
train_features.describe().transpose()[['mean', 'std']]

# Linear regression con una feature

In [None]:
import tensorflow as tf

In [None]:
linear_one_model = tf.keras.Sequential([
    tf.keras.layers.Dense(units=1, input_dim=1)
])

In [None]:
linear_one_model.summary()

In [None]:
linear_one_model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.1),
    loss='mean_absolute_error'
)

In [None]:
train_features

In [None]:
history = linear_one_model.fit(
    train_features['MedInc'], train_labels,
    epochs=10,
    validation_split = 0.2
)

In [None]:
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist = hist.set_index('epoch')
hist.plot(grid=True)

In [None]:
x = tf.linspace(train_features['MedInc'].min(), train_features['MedInc'].max(), 100)
y = linear_one_model.predict(x)

In [None]:
plt.scatter(train_features['MedInc'], train_labels, label='Data')
plt.plot(x, y, color='k', label='Predictions')
plt.xlabel('MedInc')
plt.ylabel('target')
plt.legend()

In [None]:
linear_one_model.weights

In [None]:
test_results = pd.DataFrame(columns=['train', 'test'])

In [None]:
test_results.loc['linear_one_model','test'] = linear_one_model.evaluate(
    test_features['MedInc'], test_labels, verbose=0
)
test_results.loc['linear_one_model','train'] = linear_one_model.evaluate(
    train_features['MedInc'], train_labels, verbose=0
)

In [None]:
test_results

___

## Lineal model con todas las features

In [None]:
linear_model = tf.keras.Sequential([
    tf.keras.layers.Dense(units=1, input_dim=train_features.shape[1])
])

In [None]:
linear_model.summary()

In [None]:
linear_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
    loss='mean_absolute_error'
)

In [None]:
history = linear_model.fit(
    train_features, train_labels,
    epochs=10,
    validation_split=0.2
)

In [None]:
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist = hist.set_index('epoch')
hist.plot(grid=True)

In [None]:
test_results.loc['linear_model','test'] = linear_model.evaluate(test_features, test_labels, verbose=0)
test_results.loc['linear_model','train'] = linear_model.evaluate(train_features, train_labels, verbose=0)

In [None]:
test_results

In [None]:
linear_model.weights[0]

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))
ax.bar(x=train_features.columns, height=linear_model.weights[0].numpy()[:, 0])

___

## Deep model con una feature

In [None]:
deep_one_model = tf.keras.Sequential([
      tf.keras.layers.Dense(64, activation='relu', input_dim=1),
      tf.keras.layers.Dense(64, activation='relu'),
      tf.keras.layers.Dense(1)
  ]
)

In [None]:
deep_one_model.summary()

In [None]:
deep_one_model.compile(
    loss='mean_absolute_error',
    optimizer=tf.keras.optimizers.Adam(0.001)
)

In [None]:
history = deep_one_model.fit(
    train_features['MedInc'], train_labels,
    epochs=10,
    validation_split = 0.2
)

In [None]:
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist = hist.set_index('epoch')
hist.plot(grid=True)

In [None]:
x = tf.linspace(train_features['MedInc'].min(), train_features['MedInc'].max(), 1000)
y = deep_one_model.predict(x)

In [None]:
plt.scatter(train_features['MedInc'], train_labels, label='Data')
plt.plot(x, y, color='k', label='Predictions')
plt.xlabel('MedInc')
plt.ylabel('Target')
plt.legend()

In [None]:
test_results.loc['deep_one_model','test'] = deep_one_model.evaluate(test_features['MedInc'], test_labels, verbose=0)
test_results.loc['deep_one_model','train'] = deep_one_model.evaluate(train_features['MedInc'], train_labels, verbose=0)

In [None]:
test_results

## Full model deep model

In [None]:
dnn_model = tf.keras.Sequential([
      tf.keras.layers.Dense(64, activation='relu', input_dim=train_features.shape[1]),
      tf.keras.layers.Dense(64, activation='relu'),
      tf.keras.layers.Dense(1)
  ]
)

In [None]:
dnn_model.summary()

In [None]:
dnn_model.compile(
    loss='mean_absolute_error',
    optimizer=tf.keras.optimizers.Adam(0.001)
)

In [None]:
history = dnn_model.fit(
    train_features, train_labels,
    epochs=10,
    validation_split = 0.2
)

In [None]:
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist = hist.set_index('epoch')
hist.plot(grid=True)

In [None]:
test_results.loc['dnn_model','test'] = dnn_model.evaluate(test_features, test_labels, verbose=0)
test_results.loc['dnn_model','train'] = dnn_model.evaluate(train_features, train_labels, verbose=0)

In [None]:
test_results

In [None]:
test_results.T.plot.bar()

In [None]:
test_predictions = dnn_model.predict(test_features).flatten()

fig, ax = plt.subplots(figsize=(10,10))
ax.scatter(test_labels, test_predictions)
ax.set_xlabel('True Values')
ax.set_ylabel('Predictions')
lims = [0, test_labels.max()]
ax.set_xlim(lims)
ax.set_ylim(lims)
ax.plot(lims, lims)