In [2]:
import random
import os

import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow.neuron as tfn
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.initializers import VarianceScaling
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import Callback
from sklearn.metrics import roc_auc_score

2024-03-18 08:40:26.549851: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-03-18 08:40:26.661484: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/lib:/usr/lib
2024-03-18 08:40:26.661509: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2024-03-18 08:40:26.688308: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-18 08:40:2

In [3]:
from mmoe import MMoE

SEED = 1

# Fix numpy seed for reproducibility
np.random.seed(SEED)

# Fix random seed for reproducibility
random.seed(SEED)

# Fix TensorFlow graph-level seed for reproducibility
tf.random.set_seed(SEED)

In [4]:
# Simple callback to print out ROC-AUC
class ROCCallback(Callback):
    def __init__(self, training_data, validation_data, test_data):
        self.train_X = training_data[0]
        self.train_Y = training_data[1]
        self.validation_X = validation_data[0]
        self.validation_Y = validation_data[1]
        self.test_X = test_data[0]
        self.test_Y = test_data[1]

    def on_train_begin(self, logs={}):
        return

    def on_train_end(self, logs={}):
        return

    def on_epoch_begin(self, epoch, logs={}):
        return

    def on_epoch_end(self, epoch, logs={}):
        train_prediction = self.model.predict(self.train_X)
        validation_prediction = self.model.predict(self.validation_X)
        test_prediction = self.model.predict(self.test_X)

        # Iterate through each task and output their ROC-AUC across different datasets
        for index, output_name in enumerate(self.model.output_names):
            train_roc_auc = roc_auc_score(self.train_Y[index], train_prediction[index])
            validation_roc_auc = roc_auc_score(self.validation_Y[index], validation_prediction[index])
            test_roc_auc = roc_auc_score(self.test_Y[index], test_prediction[index])
            print(
                'ROC-AUC-{}-Train: {} ROC-AUC-{}-Validation: {} ROC-AUC-{}-Test: {}'.format(
                    output_name, round(train_roc_auc, 4),
                    output_name, round(validation_roc_auc, 4),
                    output_name, round(test_roc_auc, 4)
                )
            )

        return

    def on_batch_begin(self, batch, logs={}):
        return

    def on_batch_end(self, batch, logs={}):
        return


def data_preparation():
    # The column names are from
    # https://www2.1010data.com/documentationcenter/prod/Tutorials/MachineLearningExamples/CensusIncomeDataSet.html
    column_names = ['age', 'class_worker', 'det_ind_code', 'det_occ_code', 'education', 'wage_per_hour', 'hs_college',
                    'marital_stat', 'major_ind_code', 'major_occ_code', 'race', 'hisp_origin', 'sex', 'union_member',
                    'unemp_reason', 'full_or_part_emp', 'capital_gains', 'capital_losses', 'stock_dividends',
                    'tax_filer_stat', 'region_prev_res', 'state_prev_res', 'det_hh_fam_stat', 'det_hh_summ',
                    'instance_weight', 'mig_chg_msa', 'mig_chg_reg', 'mig_move_reg', 'mig_same', 'mig_prev_sunbelt',
                    'num_emp', 'fam_under_18', 'country_father', 'country_mother', 'country_self', 'citizenship',
                    'own_or_self', 'vet_question', 'vet_benefits', 'weeks_worked', 'year', 'income_50k']

    # Load the dataset in Pandas
    train_df = pd.read_csv(
        'data/census-income.data.gz',
        delimiter=',',
        header=None,
        index_col=None,
        names=column_names
    )
    other_df = pd.read_csv(
        'data/census-income.test.gz',
        delimiter=',',
        header=None,
        index_col=None,
        names=column_names
    )

    # First group of tasks according to the paper
    label_columns = ['income_50k', 'marital_stat']

    # One-hot encoding categorical columns
    categorical_columns = ['class_worker', 'det_ind_code', 'det_occ_code', 'education', 'hs_college', 'major_ind_code',
                           'major_occ_code', 'race', 'hisp_origin', 'sex', 'union_member', 'unemp_reason',
                           'full_or_part_emp', 'tax_filer_stat', 'region_prev_res', 'state_prev_res', 'det_hh_fam_stat',
                           'det_hh_summ', 'mig_chg_msa', 'mig_chg_reg', 'mig_move_reg', 'mig_same', 'mig_prev_sunbelt',
                           'fam_under_18', 'country_father', 'country_mother', 'country_self', 'citizenship',
                           'vet_question']
    train_raw_labels = train_df[label_columns]
    other_raw_labels = other_df[label_columns]
    transformed_train = pd.get_dummies(train_df.drop(label_columns, axis=1), columns=categorical_columns)
    transformed_other = pd.get_dummies(other_df.drop(label_columns, axis=1), columns=categorical_columns)

    # Filling the missing column in the other set
    transformed_other['det_hh_fam_stat_ Grandchild <18 ever marr not in subfamily'] = 0

    # One-hot encoding categorical labels
    train_income = to_categorical((train_raw_labels.income_50k == ' 50000+.').astype(int), num_classes=2)
    train_marital = to_categorical((train_raw_labels.marital_stat == ' Never married').astype(int), num_classes=2)
    other_income = to_categorical((other_raw_labels.income_50k == ' 50000+.').astype(int), num_classes=2)
    other_marital = to_categorical((other_raw_labels.marital_stat == ' Never married').astype(int), num_classes=2)

    dict_outputs = {
        'income': train_income.shape[1],
        'marital': train_marital.shape[1]
    }
    dict_train_labels = {
        'income': train_income,
        'marital': train_marital
    }
    dict_other_labels = {
        'income': other_income,
        'marital': other_marital
    }
    output_info = [(dict_outputs[key], key) for key in sorted(dict_outputs.keys())]

    # Split the other dataset into 1:1 validation to test according to the paper
    validation_indices = transformed_other.sample(frac=0.5, replace=False, random_state=SEED).index
    test_indices = list(set(transformed_other.index) - set(validation_indices))
    validation_data = transformed_other.iloc[validation_indices]
    validation_label = [dict_other_labels[key][validation_indices] for key in sorted(dict_other_labels.keys())]
    test_data = transformed_other.iloc[test_indices]
    test_label = [dict_other_labels[key][test_indices] for key in sorted(dict_other_labels.keys())]
    train_data = transformed_train
    train_label = [dict_train_labels[key] for key in sorted(dict_train_labels.keys())]

    return train_data, train_label, validation_data, validation_label, test_data, test_label, output_info


In [5]:
# Load the data
train_data, train_label, validation_data, validation_label, test_data, test_label, output_info = data_preparation()
num_features = train_data.shape[1]

print('Training data shape = {}'.format(train_data.shape))
print('Validation data shape = {}'.format(validation_data.shape))
print('Test data shape = {}'.format(test_data.shape))

# Set up the input layer
input_layer = Input(shape=(num_features,))

# Set up MMoE layer
mmoe_layers = MMoE(
    units=4,
    num_experts=8,
    num_tasks=2
)(input_layer)

output_layers = []

# Build tower layer from MMoE layer
for index, task_layer in enumerate(mmoe_layers):
    tower_layer = Dense(
        units=8,
        activation='relu',
        kernel_initializer=VarianceScaling())(task_layer)
    output_layer = Dense(
        units=output_info[index][0],
        name=output_info[index][1],
        activation='softmax',
        kernel_initializer=VarianceScaling())(tower_layer)
    output_layers.append(output_layer)

# Compile model
model = Model(inputs=[input_layer], outputs=output_layers)
adam_optimizer = Adam()
model.compile(
    loss={'income': 'binary_crossentropy', 'marital': 'binary_crossentropy'},
    optimizer=adam_optimizer,
    metrics=['accuracy']
)

Training data shape = (199523, 499)
Validation data shape = (49881, 499)
Test data shape = (49881, 499)


2024-03-18 08:40:31.372846: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [6]:
train_data = train_data.astype(float)
validation_data = validation_data.astype(float)
test_data = test_data.astype(float)

In [7]:
model.fit(
        x=train_data,
        y=train_label,
        epochs=1
    )



<keras.callbacks.History at 0x7fed024989d0>

In [8]:
model_dir = './model/mmoe'   
model.save(model_dir)

INFO:tensorflow:Assets written to: ./model/mmoe/assets


In [9]:
loaded_model = tf.keras.models.load_model(model_dir)
example_inputs = tf.random.uniform([0, 499])
tfn.analyze_model(loaded_model, example_inputs)

The following operations are currently supported in tensorflow-neuron for this model:
Softmax
Relu
MatMul
BiasAdd
The following operations are currently not supported in tensorflow-neuron for this model:
StatefulPartitionedCall
92.31% of all operations (12 of 13) are supported


{'percent_supported': 92.3076923076923,
 'supported_count': 12,
 'total_count': 13,
 'supported_operators': {'BiasAdd', 'MatMul', 'Relu', 'Softmax'},
 'unsupported_operators': ['StatefulPartitionedCall'],
 'operators': ['BiasAdd',
  'MatMul',
  'Relu',
  'Softmax',
  'StatefulPartitionedCall'],
 'operator_count': {'StatefulPartitionedCall': 1,
  'MatMul': 4,
  'BiasAdd': 4,
  'Relu': 2,
  'Softmax': 2}}

In [10]:
output = loaded_model.predict(test_data)
output



[array([[0.9956274 , 0.00437255],
        [0.9956274 , 0.00437255],
        [0.9956274 , 0.00437255],
        ...,
        [0.9956274 , 0.00437255],
        [0.9956274 , 0.00437255],
        [0.9956274 , 0.00437255]], dtype=float32),
 array([[9.9177831e-01, 8.2216049e-03],
        [5.1579947e-05, 9.9994832e-01],
        [1.2167198e-04, 9.9987823e-01],
        ...,
        [9.9181008e-01, 8.1898617e-03],
        [9.4658452e-01, 5.3415503e-02],
        [9.9388486e-01, 6.1151166e-03]], dtype=float32)]

In [11]:
neuron_model = tfn.trace(loaded_model, test_data)  # trace

2024-03-18 08:40:47.145479: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-03-18 08:40:47.145635: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session
2024-03-18 08:40:47.344537: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-03-18 08:40:47.344630: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session
2024-03-18 08:40:47.571062: I tensorflow/neuron/grappler/convert/segment.cc:456] There are 5 ops of 2 different types in the graph that are not compiled by neuron-cc: Placeholder, NoOp, (For more information see https://awsdocs-neuron.readthedocs-hosted.com/en/latest/release-notes/neuron-cc-ops/neuron-cc-ops-tensorflow.html).
2024-03-18 08:40:47.586813: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-03-18 08:40:47.586919: I 

.complexity:  884

TensorContractLayout 0 block=j_0=[0:4:1] load_par=k=[0:499:1] store_par=j_1=[0:8:1] free=i=[0:49881:1]
layout_profile(block_axes_tripcount=784, parallel_axes_tripcount=1024)
	l 91 input0   [49881, 499] [F,P] ['i', 'k']
	l 89 input4   [499, 4, 8] [P,B,F] ['k', 'j_0', 'j_1']
	r           float32 $309[i,j_0,j_1] = tensor_contract_multiply_add(float32 $91[i,j_0,j_1,k], float32 $89[i,j_0,j_1,k], contract={k=[0:499:1]}, lhs_free={i=[0:49881:1]}, rhs_free={j_0=[0:4:1],j_1=[0:8:1]}) # dl = model/m_mo_e/StatefulPartitionedCall/StatefulPartitionedCall/Tensordot/MatMul_dot.81
	l 100 input3   [4, 8] [B,P] ['j_0', 'j_1']
	s 109 mhlo_maximum_5   [49881, 4, 8] [F,B,P] ['i', 'j_0', 'j_1']



TensorContractLayout 1 block= load_par=k=[0:499:1] store_par=j=[0:8:1] free=i=[0:49881:1]
layout_profile(block_axes_tripcount=196, parallel_axes_tripcount=1024)
	l 115 input5   [8] [P] ['j']
	l 110 input0   [49881, 499] [F,P] ['i', 'k']
	l 111 input1   [499, 8] [P,F] ['k', 'j']
	r         float3

In [12]:
print(neuron_model.on_neuron_ratio)

0.0714285714285714


In [13]:
#neuron_model.build(input_shape=test_data.shape)

In [14]:
test_data[:1]

Unnamed: 0,age,wage_per_hour,capital_gains,capital_losses,stock_dividends,instance_weight,num_emp,own_or_self,vet_benefits,weeks_worked,...,country_self_ Yugoslavia,citizenship_ Foreign born- Not a citizen of U S,citizenship_ Foreign born- U S citizen by naturalization,citizenship_ Native- Born abroad of American Parent(s),citizenship_ Native- Born in Puerto Rico or U S Outlying,citizenship_ Native- Born in the United States,vet_question_ No,vet_question_ Not in universe,vet_question_ Yes,det_hh_fam_stat_ Grandchild <18 ever marr not in subfamily
0,38.0,0.0,0.0,0.0,0.0,1032.38,4.0,0.0,2.0,12.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [15]:
neuron_model.predict(test_data[:10])

[array([[0.995628  , 0.00437197],
        [0.995628  , 0.00437197],
        [0.995628  , 0.00437197],
        ...,
        [0.9955972 , 0.00440288],
        [0.9955972 , 0.00440288],
        [0.9955972 , 0.00440288]], dtype=float32),
 array([[9.91760373e-01, 8.23958777e-03],
        [5.16981381e-05, 9.99948263e-01],
        [1.21341494e-04, 9.99878645e-01],
        ...,
        [9.97491360e-01, 2.50861933e-03],
        [9.97491360e-01, 2.50861933e-03],
        [9.97491360e-01, 2.50861933e-03]], dtype=float32)]

In [16]:
neuron_dir = "./model/model_neuron"
neuron_model.save(neuron_dir)

INFO:tensorflow:Assets written to: ./model/model_neuron/assets


In [17]:
model_neuron_reloaded = tf.keras.models.load_model(neuron_dir)



In [19]:
model_neuron_reloaded(test_data[:1])

[<tf.Tensor: shape=(49881, 2), dtype=float32, numpy=
 array([[0.995628  , 0.00437197],
        [0.9955972 , 0.00440288],
        [0.9955972 , 0.00440288],
        ...,
        [0.9955972 , 0.00440288],
        [0.9955972 , 0.00440288],
        [0.9955972 , 0.00440288]], dtype=float32)>,
 <tf.Tensor: shape=(49881, 2), dtype=float32, numpy=
 array([[0.9917604 , 0.00823959],
        [0.99749136, 0.00250862],
        [0.99749136, 0.00250862],
        ...,
        [0.99749136, 0.00250862],
        [0.99749136, 0.00250862],
        [0.99749136, 0.00250862]], dtype=float32)>]