# **Distribution Strategy Design Pattern**

### ***Loading Libraries***

In [1]:
# Operating Systems
import os
import shutil
import datetime

# Numerical Computing
import numpy as np

# Data Manipuation
import pandas as pd

# SciPy
import scipy
from scipy import stats

# Data Visualization
import itertools
import seaborn as sns
import matplotlib.pyplot as plt

# BigQuery
from google.cloud import bigquery
from google.colab import auth

# Scikit-Learn
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import MultiLabelBinarizer

# Extreme Gradient Boosting
import xgboost as xgb

# TensorFlow
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Model
from tensorflow.keras import layers
from tensorflow_hub import KerasLayer
from tensorflow import feature_column as fc
from tensorflow.keras.preprocessing import text
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras import callbacks, layers, models, utils
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Dense, Embedding, Input, Flatten, Conv2D, MaxPooling2D

In [2]:
CSV_COLUMNS = ["weight_pounds",
               "is_male",
               "mother_age",
               "plurality",
               "gestation_weeks",
               "mother_race"]

# Add string name for label column
LABEL_COLUMN = "weight_pounds"

# Set default values for each CSV column as a list of lists.
# Treat is_male and plurality as strings.
DEFAULTS = [[0.0], ["null"], [0.0], ["null"], [0.0], ["null"]]

In [3]:
def features_and_labels(row_data):
    """Splits features and labels from feature dictionary.
    Args:
        row_data: Dictionary of CSV column names and tensor values.
    Returns:
        Dictionary of feature tensors and label tensor.
    """
    label = row_data.pop(LABEL_COLUMN)

    return row_data, label


def load_dataset(pattern, batch_size=1, mode=tf.estimator.ModeKeys.EVAL):
    """Loads dataset using the tf.data API from CSV files.
    Args:
        pattern: str, file pattern to glob into list of files.
        batch_size: int, the number of examples per batch.
        mode: tf.estimator.ModeKeys to determine if training or evaluating.
    Returns:
        `Dataset` object.
    """
    # Make a CSV dataset
    dataset = tf.data.experimental.make_csv_dataset(
        file_pattern=pattern,
        batch_size=batch_size,
        column_names=CSV_COLUMNS,
        column_defaults=DEFAULTS)

    # Map dataset to features and label
    dataset = dataset.map(map_func=features_and_labels)  # features, label

    # Shuffle and repeat for training
    if mode == tf.estimator.ModeKeys.TRAIN:
        dataset = dataset.shuffle(buffer_size=1000).repeat()

    # Take advantage of multi-threading; 1=AUTOTUNE
    dataset = dataset.prefetch(buffer_size=1)

    return dataset

#### ***Building as Previous Model***

In [4]:
def create_input_layers():
    """Creates dictionary of input layers for each feature.

    Returns:
        Dictionary of `tf.Keras.layers.Input` layers for each feature.
    """
    inputs = {
        colname: tf.keras.layers.Input(
            name=colname, shape=(), dtype="float32")
        for colname in ["mother_age", "gestation_weeks"]}

    inputs.update({
        colname: tf.keras.layers.Input(
            name=colname, shape=(), dtype="string")
        for colname in ["is_male", "plurality", "mother_race"]})

    return inputs

#### ***Feature's Column Set-Up***

In [5]:
def categorical_fc(name, values):
    cat_column = fc.categorical_column_with_vocabulary_list(
            key=name, vocabulary_list=values)

    return fc.indicator_column(categorical_column=cat_column)


def create_feature_columns():
    feature_columns = {
        colname : fc.numeric_column(key=colname)
           for colname in ["mother_age", "gestation_weeks"]
    }

    feature_columns["is_male"] = categorical_fc(
        "is_male", ["True", "False", "Unknown"])
    feature_columns["plurality"] = categorical_fc(
        "plurality", ["Single(1)", "Twins(2)", "Triplets(3)",
                      "Quadruplets(4)", "Quintuplets(5)", "Multiple(2+)"])
    feature_columns["mother_race"] = fc.indicator_column(
        fc.categorical_column_with_hash_bucket(
            "mother_race", hash_bucket_size=17, dtype=tf.dtypes.string))

    feature_columns["gender_x_plurality"] = fc.embedding_column(
        fc.crossed_column(["is_male", "plurality"], hash_bucket_size=18),
        dimension=2)

    return feature_columns

In [6]:
def get_model_outputs(inputs):
    h1 = layers.Dense(64, activation="relu", name="h1")(inputs)
    h2 = layers.Dense(32, activation="relu", name="h2")(h1)

    output = layers.Dense(units=1, activation="linear", name="weight")(h2)

    return output

In [7]:
def rmse(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean((y_pred - y_true) ** 2))

## **Model Build & Set-Up Distribution Strategy**

In [8]:
def build_dnn_model():
    """Builds simple DNN using Keras Functional API.

    Returns:
        `tf.keras.models.Model` object.
    """
    # Input Layer
    inputs = create_input_layers()

    # Feature columns
    feature_columns = create_feature_columns()

    dnn_inputs = layers.DenseFeatures(
        feature_columns=feature_columns.values())(inputs)

    # Output
    output = get_model_outputs(dnn_inputs)

    # Setting Model
    model = tf.keras.models.Model(inputs=inputs, outputs=output)
    # Compiling Model
    model.compile(optimizer="adam", loss="mse", metrics=[rmse, "mse"])

    return model

# Distribution Strategy
mirrored_strategy = tf.distribute.MirroredStrategy()

with mirrored_strategy.scope():
    model = build_dnn_model()

print("Here is our DNN architecture so far:\n")
print(model.summary())

Instructions for updating:
Use Keras preprocessing layers instead, either directly or via the `tf.keras.utils.FeatureSpace` utility. Each of `tf.feature_column.*` has a functional equivalent in `tf.keras.layers` for feature preprocessing when training a Keras model.
Instructions for updating:
Use Keras preprocessing layers instead, either directly or via the `tf.keras.utils.FeatureSpace` utility. Each of `tf.feature_column.*` has a functional equivalent in `tf.keras.layers` for feature preprocessing when training a Keras model.
Instructions for updating:
Use Keras preprocessing layers instead, either directly or via the `tf.keras.utils.FeatureSpace` utility. Each of `tf.feature_column.*` has a functional equivalent in `tf.keras.layers` for feature preprocessing when training a Keras model.
Instructions for updating:
Use Keras preprocessing layers instead, either directly or via the `tf.keras.utils.FeatureSpace` utility. Each of `tf.feature_column.*` has a functional equivalent in `tf.k

Here is our DNN architecture so far:

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 gestation_weeks (InputLaye  [(None,)]                    0         []                            
 r)                                                                                               
                                                                                                  
 is_male (InputLayer)        [(None,)]                    0         []                            
                                                                                                  
 mother_age (InputLayer)     [(None,)]                    0         []                            
                                                                                                  
 mother_race (InputLayer)    [(None,)]                  

In [9]:
print('Number of devices: {}'.format(mirrored_strategy.num_replicas_in_sync))

Number of devices: 1
