# **Ensamble Design Pattern**

### ***Loading Libraries***

In [1]:
# Operating Systems
import os
import shutil

# Numerical Computing
import numpy as np

# Data Manipuation
import pandas as pd

# SciPy
import scipy
from scipy import stats

# Data Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# BigQuery
from google.cloud import bigquery
from google.colab import auth

# Scikit-Learn
from sklearn.utils import shuffle
from sklearn.preprocessing import MultiLabelBinarizer

# TensorFlow
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Model
from tensorflow_hub import KerasLayer
from tensorflow import feature_column as fc
from tensorflow.keras.preprocessing import text
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras import callbacks, layers, models, utils
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Dense, Embedding, Input, Flatten, Conv2D, MaxPooling2D

In [2]:
# User Authentication
auth.authenticate_user()

# BigQuery Library
# !pip install --upgrade google-cloud-bigquery

In [3]:
project_id = 'core-catalyst-425922-v9'
os.environ['GOOGLE_CLOUD_PROJECT'] = project_id

# BigQuery Client Config
client = bigquery.Client(project=project_id)

In [4]:
df = pd.read_csv("/content/babyweight_train.csv")
df.head()

Unnamed: 0,weight_pounds,is_male,mother_age,plurality,gestation_weeks,mother_race
0,7.749249,False,12,Single(1),40,1.0
1,7.561856,True,12,Single(1),40,2.0
2,7.18707,False,12,Single(1),34,3.0
3,6.375769,True,12,Single(1),36,2.0
4,7.936641,False,12,Single(1),35,


### Create our `tf.data` input pipeline

In [5]:
CSV_COLUMNS = ["weight_pounds",
               "is_male",
               "mother_age",
               "plurality",
               "gestation_weeks",
               "mother_race"]

LABEL_COLUMN = "weight_pounds"

DEFAULTS = [[0.0], ["null"], [0.0], ["null"], [0.0], ["0"]]

In [7]:
def get_dataset(file_path):
    dataset = tf.data.experimental.make_csv_dataset(
        file_path,
        batch_size=15,
        label_name=LABEL_COLUMN,
        select_columns=CSV_COLUMNS,
        column_defaults=DEFAULTS,
        num_epochs=1,
        ignore_errors=True)
    return dataset

train_data = get_dataset("/content/babyweight_train.csv")
test_data = get_dataset("/content/babyweight_eval.csv")

Instructions for updating:
Use `tf.data.Dataset.ignore_errors` instead.


In [8]:
def show_batch(dataset):
    for batch, label in dataset.take(1):
        for key, value in batch.items():
            print("{:20s}: {}".format(key,value.numpy()))

show_batch(train_data)

is_male             : [b'False' b'True' b'True' b'False' b'False' b'False' b'False' b'False'
 b'True' b'False' b'False' b'False' b'True' b'True' b'False']
mother_age          : [17. 17. 17. 17. 17. 16. 18. 17. 16. 18. 17. 17. 15. 15. 18.]
plurality           : [b'Single(1)' b'Single(1)' b'Single(1)' b'Single(1)' b'Single(1)'
 b'Single(1)' b'Single(1)' b'Single(1)' b'Single(1)' b'Single(1)'
 b'Single(1)' b'Single(1)' b'Single(1)' b'Single(1)' b'Single(1)']
gestation_weeks     : [40. 39. 39. 38. 39. 40. 36. 39. 44. 41. 38. 40. 43. 38. 36.]
mother_race         : [b'2.0' b'0' b'1.0' b'1.0' b'0' b'1.0' b'0' b'2.0' b'1.0' b'1.0' b'0'
 b'2.0' b'2.0' b'0' b'0']


# ***Create Features Columns***

In [9]:
numeric_columns = [fc.numeric_column("mother_age"),
                  fc.numeric_column("gestation_weeks")]

CATEGORIES = {
    'plurality': ["Single(1)", "Twins(2)", "Triplets(3)",
                  "Quadruplets(4)", "Quintuplets(5)", "Multiple(2+)"],
    'is_male' : ["True", "False", "Unknown"],
    'mother_race': [str(_) for _ in df.mother_race.unique()]
}

categorical_columns = []
for feature, vocab in CATEGORIES.items():
  cat_col = fc.categorical_column_with_vocabulary_list(
        key=feature, vocabulary_list=vocab)
  categorical_columns.append(fc.indicator_column(cat_col))

Instructions for updating:
Use Keras preprocessing layers instead, either directly or via the `tf.keras.utils.FeatureSpace` utility. Each of `tf.feature_column.*` has a functional equivalent in `tf.keras.layers` for feature preprocessing when training a Keras model.
Instructions for updating:
Use Keras preprocessing layers instead, either directly or via the `tf.keras.utils.FeatureSpace` utility. Each of `tf.feature_column.*` has a functional equivalent in `tf.keras.layers` for feature preprocessing when training a Keras model.
Instructions for updating:
Use Keras preprocessing layers instead, either directly or via the `tf.keras.utils.FeatureSpace` utility. Each of `tf.feature_column.*` has a functional equivalent in `tf.keras.layers` for feature preprocessing when training a Keras model.


### ***Creating Ensemble Models***

In [10]:
inputs = {colname: tf.keras.layers.Input(
    name=colname, shape=(), dtype="float32")
    for colname in ["mother_age", "gestation_weeks"]}

inputs.update({colname: tf.keras.layers.Input(
    name=colname, shape=(), dtype="string")
    for colname in ["is_male", "plurality", "mother_race"]})

dnn_inputs = layers.DenseFeatures(categorical_columns+numeric_columns)(inputs)

# model_1
model1_h1 = layers.Dense(50, activation="relu")(dnn_inputs)
model1_h2 = layers.Dense(30, activation="relu")(model1_h1)
model1_output = layers.Dense(1, activation="relu")(model1_h2)
model_1 = tf.keras.models.Model(inputs=inputs, outputs=model1_output, name="model_1")

# model_2
model2_h1 = layers.Dense(64, activation="relu")(dnn_inputs)
model2_h2 = layers.Dense(32, activation="relu")(model2_h1)
model2_output = layers.Dense(1, activation="relu")(model2_h2)
model_2 = tf.keras.models.Model(inputs=inputs, outputs=model2_output, name="model_2")

# model_3
model3_h1 = layers.Dense(32, activation="relu")(dnn_inputs)
model3_output = layers.Dense(1, activation="relu")(model3_h1)
model_3 = tf.keras.models.Model(inputs=inputs, outputs=model3_output, name="model_3")

In [11]:
def fit_model(model):
    model.compile(
        loss=tf.keras.losses.MeanSquaredError(),
        optimizer='adam', metrics=['mse'])

    model.fit(train_data.shuffle(500), epochs=1)

    test_loss, test_mse = model.evaluate(test_data)
    print('\n\n{}:\nTest Loss {}, Test RMSE {}'.format(
        model.name, test_loss, test_mse**0.5))

    return model

In [12]:
try:
    os.makedirs('models')
except:
    print("directory already exists")

In [13]:
members = [model_1, model_2, model_3]

n_members = len(members)

for i in range(n_members):
    model = fit_model(members[i])
    filename = 'models/model_' + str(i + 1) + '.h5'
    model.save(filename, save_format='tf')
    print('Saved {}\n'.format(filename))



Instructions for updating:
Use Keras preprocessing layers instead, either directly or via the `tf.keras.utils.FeatureSpace` utility. Each of `tf.feature_column.*` has a functional equivalent in `tf.keras.layers` for feature preprocessing when training a Keras model.




model_1:
Test Loss 54.19366455078125, Test RMSE 7.361634638501238
Saved models/model_1.h5



model_2:
Test Loss 54.19367980957031, Test RMSE 7.361635674873507
Saved models/model_2.h5



model_3:
Test Loss 54.19364929199219, Test RMSE 7.361633602128823
Saved models/model_3.h5



### ***Loading Trained Models & Creating The Stacked Ensemble Model***

In [14]:
def load_models(n_models):
    all_models = []
    for i in range(n_models):
        filename = 'models/model_' + str(i + 1) + '.h5'
        model = models.load_model(filename)
        all_models.append(model)
        print('>loaded %s' % filename)
    return all_models

In [15]:
members = load_models(n_members)

print('Loaded %d models' % len(members))

>loaded models/model_1.h5
>loaded models/model_2.h5
>loaded models/model_3.h5
Loaded 3 models


In [16]:
for i in range(n_members):
    model = members[i]
    for layer in model.layers:
        layer.trainable = False
        layer._name = 'ensemble_' + str(i+1) + '_' + layer.name

In [17]:
member_inputs = [model.input for model in members]

member_outputs = [model.output for model in members]
merge = layers.concatenate(member_outputs)
h1 = layers.Dense(30, activation='relu')(merge)
h2 = layers.Dense(20, activation='relu')(h1)
h3 = layers.Dense(10, activation='relu')(h2)
h4 = layers.Dense(5, activation='relu')(h2)
ensemble_output = layers.Dense(1, activation='relu')(h3)
ensemble_model = Model(inputs=member_inputs, outputs=ensemble_output)

tf.keras.utils.plot_model(ensemble_model, show_shapes=True, to_file='ensemble_graph.png')

# Compile
ensemble_model.compile(loss='mse', optimizer='adam', metrics=['mse'])

In [18]:
FEATURES = ["is_male", "mother_age", "plurality",
            "gestation_weeks", "mother_race"]

def stack_features(features, label):
    for feature in FEATURES:
        for i in range(n_members):
            features['ensemble_' + str(i+1) + '_' + feature] = features[feature]

    return features, label

ensemble_data = train_data.map(stack_features).repeat(1)

In [19]:
ensemble_model.fit(ensemble_data.shuffle(500), epochs=1)



<keras.src.callbacks.History at 0x7f78a2181e10>

In [20]:
val_loss, val_mse = ensemble_model.evaluate(test_data.map(stack_features))



In [21]:
print("Validation RMSE: {}".format(val_mse**0.5))

Validation RMSE: 7.361633083942561
