In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
import tensorflow as tf

Data Pipeline

In [2]:
from pathlib import Path
import tarfile
import urllib.request

def load_housing_data():
    tarball_path = Path("datasets/housing.tgz")
    if not tarball_path.is_file():
        Path("datasets").mkdir(parents=True ,exist_ok=True)
        url = "https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/housing/housing.tgz"
        urllib.request.urlretrieve(url, tarball_path)
        with tarfile.open(tarball_path) as housing_tarball:
            housing_tarball.extractall(path="datasets")
    return pd.read_csv(Path("datasets/housing.csv"))        

In [3]:
housing = load_housing_data()

In [47]:
housing.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY


In [38]:
dataset_path = Path("datasets/housing.csv")
dataset_line = tf.data.TextLineDataset(str(dataset_path))
record_defaults = [tf.constant([0.], dtype=tf.float32)] * 7 + [tf.constant([0.], dtype=tf.float32)] * 2 + [tf.constant([], dtype=tf.string)] 

In [49]:
def parserx(line):
    x = tf.io.decode_csv(line, record_defaults=record_defaults)
    features1 = tf.stack(x[:8])
    features2 = tf.stack(tf.concat([x[0:2], x[3:5]], axis=0))
    return features1 , features2
def parsery(line):
    x = tf.io.decode_csv(line, record_defaults=record_defaults)
    return tf.stack(x[8])

In [50]:
data_x = dataset_line.skip(1).map(parserx)
data_y = dataset_line.skip(1).map(parsery)

In [51]:
data_x_wide_array = tf.TensorArray(tf.float32, size = 0 , dynamic_size=True)
data_x_deep_array = tf.TensorArray(tf.float32, size = 0 , dynamic_size=True)
data_y_array = tf.TensorArray(tf.float32, size = 0, dynamic_size = True)

In [52]:
for x, y in data_x:
    data_x_deep_array = data_x_deep_array.write(data_x_deep_array.size(), x)
    data_x_wide_array = data_x_wide_array.write(data_x_wide_array.size(), y)

In [53]:
train_x_deep = data_x_deep_array.stack()
train_x_wide = data_x_wide_array.stack()

In [56]:
for f in data_y:
    data_y_array = data_y_array.write(data_y_array.size(), f)

In [57]:
train_y = data_y_array.stack()

In [58]:
train_y[0:4]

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([452600., 358500., 352100., 341300.], dtype=float32)>

## Model:

In [60]:
class GaussianRegNoiseLayer(tf.keras.layers.Layer):
    def __init__(self, units, stddev,activation=None, **kwargs):
        super().__init__(*(kwargs))
        self.stddev = stddev
        self.units = units
        self.activation = tf.keras.activations.get(activation)

    def build(self, batch_input_shape):
        self.kernel = self.add_weight('kernel', shape = [batch_input_shape[-1],self.units ],
                                      initializer='glorot_normal')
        self.bias = self.add_weight('bias', shape = [self.units],
                                    initializer='zeros')

    def call(self, X,training = False):
        shape = tf.shape(X)
        noise = tf.random.normal([shape[0], self.units], stddev=self.stddev)
        if self.activation is not None:
            if training:
                return self.activation(tf.matmul(X, self.kernel) + self.bias) + noise
            else:
                return self.activation(tf.matmul(X, self.kernel) + self.bias)
        if self.activation is None:
            if training:
                return tf.matmul(X, self.kernel) + self.bias + noise
            else:
                return tf.matmul(X, self.kernel) + self.bias
    def get_config(self):
        base_config = super().get_config()
        base_config.update({'stddev':self.stddev,
                            'units':self.units, 
                            'activation':tf.keras.activations.serialize(self.activation)})
        return base_config

In [61]:
class DualIOGaussianRT(tf.keras.Model):
    def __init__(self, architecture, keen_units, layer_depth, g_units , **kwargs):
        super().__init__(**kwargs)
        self.architecture = architecture
        self.keen_units = keen_units
        self.g_units = g_units
        self.layer_depth = layer_depth
        self.deep_norm_ = tf.keras.layers.Normalization()
        self.wide_norm_ = tf.keras.layers.Normalization()
        self.DeepFlow_ = [tf.keras.layers.Dense(units, activation = 'relu',
                                                kernel_initializer = tf.keras.initializers.glorot_normal(),
                                                kernel_regularizer = tf.keras.regularizers.l2(0.01),
                                                bias_regularizer= tf.keras.regularizers.L2(0.01)) for units in architecture]
        self.BatchNormDeep_ = [tf.keras.layers.BatchNormalization() for _ in architecture]
        self.WideFlow = [tf.keras.layers.Dense(keen_units,
                                               activation = tf.keras.layers.LeakyReLU(),
                                               kernel_initializer = tf.keras.layers.he_normal(),
                                               kernel_regularizer= tf.keras.regularizers.L2(0.1)) for _ in range(self.layer_depth)]
        self.BatchNormWide_ = tf.keras.layers.BatchNormalization()
        self.GRNlayer  = GaussianRegNoiseLayer(g_units, 0.1, activation = 'relu')
        self.concatenate_ = tf.keras.layers.Concatenate()
        self.output_1 = tf.keras.layers.Dense(1, activation = tf.keras.activations.linear(),
                                              kernel_initializer = tf.keras.initializers.glorot_normal())
        self.output_2 = tf.keras.layers.Dense(1, activation = tf.keras.activations.linear(),
                                              kernel_initializer = tf.keras.initializers.glorot_normal())
        
        def call(self, input):
            x_deep , x_wide = input
            x_deep = self.deep_norm_(x_deep)
            x_wide = self.wide_norm_(x_wide)
            for layer, norm in zip(self.DeepFlow_, self.BatchNormDeep_):
                x_deep = layer(x_deep)
                x_deep = norm(x_deep)
            for l in self.WideFlow:
                x_wide = l(x_wide)
            x_wide = self.BatchNormWide_(x_wide)
            x_wide = self.GRNlayer(x_wide)
            x = self.concatenate_([x_deep, x_wide])
            output_1 = self.output_1(x)
            output_2 = self.output_2(x_deep)

        def get_config(self):
            base_config = super().get_config()
            base_config.update({'architecture':self.architecture,
                                'keen_units':self.keen_units,
                                'g_units':self.g_units,
                                'layer_depth':self.layer_depth})
            return base_config