# Model building Pipeline using easyflow feature_encoders module

This module is a fusion between keras layers and tensorflow feature columns

In [1]:
import pandas as pd
import tensorflow as tf

In [2]:
try:
    import easyflow
except:
    ! pip install easy-tensorflow

In [2]:
# local imports
from easyflow.data import TensorflowDataMapper
from easyflow.feature_encoders import FeatureColumnTransformer, FeatureUnionTransformer
from easyflow.feature_encoders import NumericalFeatureEncoder, EmbeddingFeatureEncoder, CategoricalFeatureEncoder

In [3]:
CSV_HEADER = [
    "age",
    "workclass",
    "fnlwgt",
    "education",
    "education_num",
    "marital_status",
    "occupation",
    "relationship",
    "race",
    "gender",
    "capital_gain",
    "capital_loss",
    "hours_per_week",
    "native_country",
    "income_bracket",
]

data_url = (
    "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
)

try:
    data_frame = pd.read_csv('adult_features.csv')
    labels_binary = pd.read_csv('adult_labels.csv')
except:
    data_frame = pd.read_csv(data_url, header=None, names=CSV_HEADER)
    labels = data_frame.pop("income_bracket")
    labels_binary = 1.0 * (labels == " >50K")
    data_frame.to_csv('adult_features.csv', index=False)
    labels_binary.to_csv('adult_labels.csv', index=False)

print(f"Train dataset shape: {data_frame.shape}")

Train dataset shape: (32561, 14)


In [4]:
batch_size = 256
dataset_mapper = TensorflowDataMapper() 
dataset = dataset_mapper.map(data_frame, labels_binary)

train_data_set, val_data_set = dataset_mapper.split_data_set(dataset)
train_data_set = train_data_set.batch(batch_size)
val_data_set = val_data_set.batch(batch_size)

## Set up the feature encoding list

In [5]:
NUMERIC_FEATURE_NAMES = [
    "age",
    "education_num",
    "capital_gain",
    "capital_loss",
    "hours_per_week",
]

CATEGORICAL_FEATURES_NAMES = [
    "workclass",
    "marital_status",
    "relationship",
    "race",
    "gender"]

EMBEDDING_FEATURES_NAMES = ['education',
                            'occupation',
                            'native_country']

In [6]:
feature_encoder_list = [('numerical_features', NumericalFeatureEncoder(), NUMERIC_FEATURE_NAMES),
                        ('categorical_features', CategoricalFeatureEncoder(), CATEGORICAL_FEATURES_NAMES),
                        ('embedding_features_deep', EmbeddingFeatureEncoder(dimension=10), EMBEDDING_FEATURES_NAMES),
                        ('embedding_features_wide', CategoricalFeatureEncoder(), EMBEDDING_FEATURES_NAMES)]

## Setting up feature layer and feature encoders

There are two main column transformer classes namely FeatureColumnTransformer and FeatureUnionTransformer. For this example we are going to build a Wide and Deep model architecture. So we will be using the FeatureColumnTransformer since it gives us more flexibility. FeatureUnionTransformer concatenates all the features in the input layer

In [7]:
feature_layer_inputs, feature_layer =  FeatureColumnTransformer(feature_encoder_list).transform(train_data_set)

In [9]:
deep = tf.keras.layers.concatenate([feature_layer['numerical_features'],
                                    feature_layer['categorical_features'],
                                    feature_layer['embedding_features_deep']])

wide = feature_layer['embedding_features_wide']

In [10]:
deep = tf.keras.layers.BatchNormalization()(deep)

for nodes in [128, 64, 32]:
    deep = tf.keras.layers.Dense(nodes, activation='relu')(deep)
    deep = tf.keras.layers.Dropout(0.5)(deep)

# combine wide and deep layers
wide_and_deep = tf.keras.layers.concatenate([deep, wide])
output = tf.keras.layers.Dense(1, activation='sigmoid')(wide_and_deep)
model = tf.keras.Model(inputs=[v for v in feature_layer_inputs.values()], outputs=output)
model.compile(loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=0.0),
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              metrics=[tf.keras.metrics.BinaryAccuracy(name='accuracy'), tf.keras.metrics.AUC(name='auc')])

In [17]:
model.fit(train_data_set, validation_data=val_data_set, epochs=10)

## Save and load model

In [19]:
model.save(filepath='tfcolumn_model_example')
del model

In [13]:
loaded_model = tf.keras.models.load_model("tfcolumn_model_example")

In [15]:
loaded_model.predict(val_data_set.take(1))