In [None]:
from tensorflow import random as tf_random
from random import seed
from numpy.random import seed as np_seed
def random_seed(seed_value):
    np_seed(seed_value) # cpu vars
    seed(seed_value) # Python
    tf_random.set_seed(seed_value) #TensorFlow

In [None]:
random_seed(123)

In [None]:
params = {'sample_perc':1,
          'bs': 512,
          'epochs': 50, 
          'n_folds': 5,
          'optimizer': 'sgd',
          'lr': 1e-01,
          'momentum': 0.9,
          'wd': 0.01,
          'emb_szs': {'hips': 5, 'cup_size': 5, 'user_name': 50, 'item_id': 50, 'category': 5, 'length': 5},
          'trainable': False
         }

In [None]:
%matplotlib notebook
%matplotlib inline

import pandas as pd
import numpy as np
import torch
import os
import glob
import sys
import json
import copy
import re
import pprint
import missingno as msno
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, roc_auc_score, confusion_matrix
import tensorflow as tf
from tensorflow import keras
from tensorflow import feature_column
import tensorflow_hub as hub
from tensorflow.keras.callbacks import Callback
import matplotlib.pyplot as plt
import seaborn as sns
plt.rc('figure', figsize = (20, 8))
plt.rc('font', size = 14)
plt.rc('axes.spines', top = False, right = False)
plt.rc('axes', grid = False)
plt.rc('axes', facecolor = 'white')

In [None]:
data_dir = 'clothing_fit_data/'

In [None]:
def normalize_bust_values(x):
    """Function to normalize the bust values"""
    try:
        if pd.notnull(x):
            if "-" in x:
                assert len(x.split("-")) == 2
                return np.mean([int(num) for num in x.split("-")])
            else:
                return int(x) #Can throw an exception
    except Exception as e: 
        # For anykind of exception return None
        return None
    return None
  
def normalize_height_values(x):
    """Function to normalize the height values"""
    if pd.notnull(x):
        try: 
            return (int(x[0])*30.48) + (int(x[4:-2])*2.54)
        except:
            return (int(x[0])*30.48) # there can't be a 10ft+ customer,neither was observed
    return None

def preprocess_data(sample_perc=1): 
    modcloth_df = pd.read_json((data_dir + 'modcloth_data.json'), lines=True)

    # Changing column names and removing unnecessary spaces
    modcloth_df.columns = [x.replace(" ","_") for x in modcloth_df.columns]

    #normalize bust and height
    modcloth_df["bust"] = modcloth_df["bust"].apply(lambda x: normalize_bust_values(x))
    modcloth_df["height"] = modcloth_df["height"].apply(lambda x: normalize_height_values(x))

    #remove outliers
    Q1 = modcloth_df.quantile(0.25)
    Q3 = modcloth_df.quantile(0.75)
    Q1.drop(["item_id","user_id"], inplace =True)
    Q3.drop(["item_id","user_id"], inplace = True)
    IQR = Q3 - Q1
    modcloth_df = modcloth_df[~((modcloth_df < (Q1 - 1.5 * IQR)) |(modcloth_df > (Q3 + 1.5 * IQR))).any(axis=1)]
    modcloth_df.reset_index(drop=True, inplace=True)

    # handling missing values
    modcloth_df.drop(["waist", "bust", "shoe_width", "shoe_size"], axis= 1, inplace=True)
    bra_size_to_cup_size = {x:y for x,y in modcloth_df.groupby("bra_size")["cup_size"].agg(pd.Series.mode).reset_index().values}
    cup_size_to_bra_size = {x:y for x,y in modcloth_df.groupby("cup_size")["bra_size"].median().reset_index().values}
    bra_size_med = modcloth_df.bra_size.median() 
    cup_size_mod = bra_size_to_cup_size[bra_size_med] # not matching with cup size mode
    imputed_value = []
    for x,y in zip(modcloth_df["bra_size"],modcloth_df["cup_size"]):
        if pd.isnull(x) and pd.isnull(y):
            imputed_value.append([bra_size_med, cup_size_mod])
        elif pd.isnull(x) and pd.notnull(y):
            imputed_value.append([cup_size_to_bra_size[y], y])
        elif pd.notnull(x) and pd.isnull(y):
            imputed_value.append([x, bra_size_to_cup_size[x]])
        else:
            imputed_value.append([x,y])
    modcloth_df[["bra_size","cup_size"]] = imputed_value
    modcloth_df.review_summary.fillna("Unknown", inplace=True)
    modcloth_df.review_text.fillna("Unknown", inplace=True)
    modcloth_df["length"] = modcloth_df.length.fillna(modcloth_df['length'].value_counts().index[0])
    modcloth_df.hips.fillna(-1.0, inplace = True)
    bins = [-2,0,31,37,40,44,75]
    labels = ['Unknown','XS','S','M', 'L','XL']
    modcloth_df.hips = pd.cut(modcloth_df.hips, bins, labels=labels)

    #drop unreliable or double included variables
    modcloth_df.drop('review_summary', axis=1, inplace = True)
    modcloth_df.drop('user_id', axis=1, inplace = True)
    modcloth_df["user_name"] = modcloth_df.user_name.apply(lambda x: x.lower())
    
    #impute missing values for height and quality variables
    from sklearn.impute import SimpleImputer
    median_imputer = SimpleImputer(strategy='median')
    median_imputer = median_imputer.fit(modcloth_df[['height','quality']])
    modcloth_df[['height','quality']] = median_imputer.transform(modcloth_df[['height','quality']])
    
    if sample_perc < 1:
        #Use a subset of the data
        modcloth_df = modcloth_df[modcloth_df['review_text'] != 'Unknown'].reset_index(drop=True)
        np.random.seed(123)
        sample = np.random.permutation(int(sample_perc * len(modcloth_df.index)))
        modcloth_df = modcloth_df.loc[sample].reset_index(drop=True)
        print(f'Using {sample_perc*100}% of the data')
        
        from sklearn.model_selection import StratifiedShuffleSplit
        random_seed(123)
        strat_sampling = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=123)
        train_indexes, test_indexes = next(strat_sampling.split(modcloth_df.loc[:,modcloth_df.columns!='fit'], modcloth_df['fit']))
    else:
        #use complete data with fixed train and test set (self splitted)
        from numpy import genfromtxt
        train_indexes = genfromtxt(data_dir + 'train_indexes.csv', delimiter=',', dtype=int)
        test_indexes = genfromtxt(data_dir + 'test_indexes.csv', delimiter=',', dtype=int)
        
    return (modcloth_df, train_indexes, test_indexes)

In [None]:
tf.__version__ #used tensorflow 2.1.0

In [None]:
modcloth_df, train_indexes, test_indexes = preprocess_data(params['sample_perc'])
modcloth_df.shape, train_indexes.shape, test_indexes.shape

In [None]:
#split into train and test set
train = modcloth_df.loc[train_indexes]
test = modcloth_df.loc[test_indexes]

In [None]:
#allocate variables to data types and input pathway
user_categorical_features = ["user_name","hips","cup_size"]
user_numerical_features = ["height","bra_size"]
item_categorical_features = ["item_id", "category", "length"]
item_numerical_features = ["size","quality"]
dep_name = ['fit']

nlp_features = ['review_text']

all_features = user_categorical_features + user_numerical_features + item_categorical_features + item_numerical_features + nlp_features

CLASS_LABELS =  np.array(["fit","small","large"])

In [None]:
CLASS_LABELS

In [None]:
# Scaling the numerical features
from sklearn import preprocessing
scaler = preprocessing.MinMaxScaler().fit(train[user_numerical_features + item_numerical_features])
train[user_numerical_features + item_numerical_features] = scaler.transform(train[user_numerical_features + item_numerical_features])
test[user_numerical_features + item_numerical_features] = scaler.transform(test[user_numerical_features + item_numerical_features])

# Categorify categorical features
for col in user_categorical_features + item_categorical_features:
    train[col] = train[col].astype(str)
    test[col] = test[col].astype(str)
    modcloth_df[col] = modcloth_df[col].astype(str)

In [None]:
def df_to_dataset(dataframe, shuffle:bool=True, batch_size=32):
    '''Create tf.data dataset from pandas DataFrame
    
    Parameters:
    dataframes: pandas DataFrame
    shuffle: Boolean indicating whether to shuffle the data
    batch_size: batch size
    
    Returns a tf.data dataset
    '''
    
    dataframe = dataframe.copy()
    labels = dataframe.pop('fit')
    #create one-hot encoded label vector
    labels = labels.apply(lambda x:x == CLASS_LABELS)
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(dataframe))
    if batch_size == None:
        # complete data in 1 batch
        ds = ds.batch(len(labels))
    else:
        ds = ds.batch(batch_size)
    return ds

In [None]:
#Create feature columns and the final input for the neural network with separated input pathways

# Numeric Columns
numeric_users = {
    col : feature_column.numeric_column(col) \
          for col in user_numerical_features
}

numeric_items = {
    col : feature_column.numeric_column(col) \
          for col in item_numerical_features
}


# Categorical Columns
hips = feature_column.categorical_column_with_vocabulary_list(
      'hips', modcloth_df.hips.unique().tolist())
cup_size = feature_column.categorical_column_with_vocabulary_list(
      'cup_size', modcloth_df.cup_size.unique().tolist())
user_name = feature_column.categorical_column_with_vocabulary_list(
      'user_name', modcloth_df.user_name.unique().tolist())

item_id = feature_column.categorical_column_with_vocabulary_list(
      'item_id', modcloth_df.item_id.unique().tolist())
category = feature_column.categorical_column_with_vocabulary_list(
      'category', modcloth_df.category.unique().tolist())
length = feature_column.categorical_column_with_vocabulary_list(
      'length', modcloth_df.length.unique().tolist())


random_seed(123)

#uncomment this section (and comment section below) to calculate the dimensions with fastai rule
'''
#dimensions calculated by fastai rule
print('Entity Embedding Dimensions:')
emb_szs = {}
for column in modcloth_df[user_categorical_features + item_categorical_features]:
    n_cat = modcloth_df[column].nunique()
    emb_sz = min(fastai_emb_limit, round(1.6 * n_cat**0.56))
    emb_szs[column] = emb_sz
    print(f'{column}: {n_cat} / {emb_sz}')
params['emb_szs'] = emb_szs
    
hips_embedding = feature_column.embedding_column(hips, dimension=emb_szs['hips'])
cup_size_embedding = feature_column.embedding_column(cup_size, dimension=emb_szs['cup_size'])
user_name_embedding = feature_column.embedding_column(user_name, dimension=emb_szs['user_name'])

item_id_embedding = feature_column.embedding_column(item_id, dimension=emb_szs['item_id'])
category_embedding = feature_column.embedding_column(category, dimension=emb_szs['category'])
length_embedding = feature_column.embedding_column(length, dimension=emb_szs['length'])
'''

#uncomment this section (and comment section above) to use dimensions of practitioner

#Practitioner embeddings
hips_embedding = feature_column.embedding_column(hips, dimension=5)
cup_size_embedding = feature_column.embedding_column(cup_size, dimension=5)
user_name_embedding = feature_column.embedding_column(user_name, dimension=50)

item_id_embedding = feature_column.embedding_column(item_id, dimension=50)
category_embedding = feature_column.embedding_column(category, dimension=5)
length_embedding = feature_column.embedding_column(length, dimension=5)



cat_users = {
    'hips' : hips_embedding,
    'cup_size' : cup_size_embedding,
    'user_name': user_name_embedding
}

cat_items = {
    'item_id' : item_id_embedding,
    'category' : category_embedding,
    'length': length_embedding
}


#prepare final inputs
input_user = {
    colname : tf.keras.layers.Input(name=colname, shape=(), dtype='float32') \
          for colname in numeric_users.keys()
}
input_user.update({
    colname : tf.keras.layers.Input(name=colname, shape=(),  dtype='string') \
          for colname in cat_users.keys()
})

input_items = {
    colname : tf.keras.layers.Input(name=colname, shape=(), dtype = 'float32') \
          for colname in numeric_items.keys()
}

input_items.update({
    colname : tf.keras.layers.Input(name=colname, shape=(),  dtype='string') \
          for colname in cat_items.keys()
})

input_nlp = tf.keras.layers.Input(name='review_text', shape=(), dtype = tf.string)

#capture all feature columns of the respective pathway in one vector
feat_cols_user = list(numeric_users.values()) + list(cat_users.values())
feat_cols_item = list(numeric_items.values()) + list(cat_items.values())

# Create feature layers
feature_layer_users = keras.layers.DenseFeatures(feat_cols_user)(input_user)
feature_layer_items = keras.layers.DenseFeatures(feat_cols_item)(input_items)

In [None]:
#Create feature columns and the final input for the neural network with NOT separated input pathways

# Numeric Columns
numeric_columns = {
    col : feature_column.numeric_column(col) \
          for col in user_numerical_features + item_numerical_features
}

# Categorical Columns
hips = feature_column.categorical_column_with_vocabulary_list(
      'hips', modcloth_df.hips.unique().tolist())
cup_size = feature_column.categorical_column_with_vocabulary_list(
      'cup_size', modcloth_df.cup_size.unique().tolist())
user_name = feature_column.categorical_column_with_vocabulary_list(
      'user_name', modcloth_df.user_name.unique().tolist())
item_id = feature_column.categorical_column_with_vocabulary_list(
      'item_id', modcloth_df.item_id.unique().tolist())
category = feature_column.categorical_column_with_vocabulary_list(
      'category', modcloth_df.category.unique().tolist())
length = feature_column.categorical_column_with_vocabulary_list(
      'length', modcloth_df.length.unique().tolist())


random_seed(123)
#one-hot encoded: uncomment this section (and comment section below) to use one-hot encoding
'''
hips_one_hot = feature_column.indicator_column(hips)
cup_size_one_hot = feature_column.indicator_column(cup_size)
user_name_one_hot = feature_column.indicator_column(user_name)
item_id_one_hot = feature_column.indicator_column(item_id)
category_one_hot = feature_column.indicator_column(category)
length_one_hot = feature_column.indicator_column(length)

cat_columns = {
    'hips' : hips_one_hot,
    'cup_size' : cup_size_one_hot,
    'user_name': user_name_one_hot,
    'item_id' : item_id_one_hot,
    'category' : category_one_hot,
    'length': length_one_hot
}
'''

#embeddings: uncomment this section (and comment section above) to use entity embeddings
hips_embedding = feature_column.embedding_column(hips, dimension=5)
cup_size_embedding = feature_column.embedding_column(cup_size, dimension=5)
user_name_embedding = feature_column.embedding_column(user_name, dimension=50)
item_id_embedding = feature_column.embedding_column(item_id, dimension=50)
category_embedding = feature_column.embedding_column(category, dimension=5)
length_embedding = feature_column.embedding_column(length, dimension=5)

cat_columns = {
    'hips' : hips_embedding,
    'cup_size' : cup_size_embedding,
    'user_name': user_name_embedding,
    'item_id' : item_id_embedding,
    'category' : category_embedding,
    'length': length_embedding
}


#prepare final inputs
input_tab = {
    colname : tf.keras.layers.Input(name=colname, shape=(), dtype='float32') \
          for colname in numeric_columns.keys()
}
input_tab.update({
    colname : tf.keras.layers.Input(name=colname, shape=(),  dtype='string') \
          for colname in cat_columns.keys()
})

#capture all feature columns in one vector
feat_cols = list(numeric_columns.values()) + list(cat_columns.values())

# Create a feature layer
feature_layer_tab = keras.layers.DenseFeatures(feat_cols)(input_tab)

In [None]:
class SkipCon(keras.layers.Layer):
    def __init__(self, size, reduce = True, deep = 3, skip_when=0, activation="relu", **kwargs):
        """
        Class for skip connections
        
        @Params
        size = size of dense layer
        deep = the depth of network in one SkipCon block call
        skip_when =  if a skip connection is required, pass 1
        activation = by default using relu
        """    
        super().__init__(**kwargs)
        self.activation = keras.activations.get(activation) # used to combine skip connections and cascaded dense layers
        self.main_layers = []
        self.skip_when = skip_when #to be used in call as a control
        if reduce:
            for _ in range(deep):
                self.main_layers.extend([
                      keras.layers.Dense(size, activation=activation, 
                                          use_bias=True),
                      keras.layers.BatchNormalization()])

                # Reduce the input size by two each time, if the
                # network is to be designed deeper and narrow
                size = size/2
        else:
            for _ in range(deep):
                self.main_layers.extend([
                    keras.layers.Dense(size, activation=activation, use_bias=True),
                    keras.layers.BatchNormalization()])

        self.skip_layers = []
        if skip_when > 0:
            if reduce:
                size = size*2 # since the size of skipped connection  
                                  # should match with cascaded dense
            self.skip_layers = [
                    keras.layers.Dense(size, activation=activation,use_bias=True),
                    keras.layers.BatchNormalization()]

    def call(self, inputs):
        Z = inputs
        for layer in self.main_layers:
            Z = layer(Z)
        if not self.skip_when:
            return self.activation(Z)
        skip_Z = inputs
        for layer in self.skip_layers:
            skip_Z = layer(skip_Z)
        return self.activation(Z + skip_Z)

In [None]:
#Prepare evaluation metrics
METRICS = [keras.metrics.TruePositives(name='tp'),
           keras.metrics.FalsePositives(name='fp'),
           keras.metrics.TrueNegatives(name='tn'),
           keras.metrics.FalseNegatives(name='fn'), 
           keras.metrics.BinaryAccuracy(name='binary_accuracy'),
           keras.metrics.CategoricalAccuracy(name='cat_accuracy'),
           keras.metrics.Precision(name='precision'),
           keras.metrics.Recall(name='recall'),
           keras.metrics.CategoricalCrossentropy(name='categorical_crossentropy'),
    ]

def get_optimizer():
    '''Function that returns an optimzer based on the parameters for the model'''
    
    if params['optimizer'] == 'sgd':
        optimizer = keras.optimizers.SGD(lr=params['lr'], momentum=params['momentum'], decay=params['wd'])
    elif params['optimizer'] == 'adam':
        optimizer = keras.optimizers.Adam(lr=params['lr'])
    elif params['optimizer'] == 'rmsprop':
        optimizer = keras.optimizers.RMSprop(lr=params['lr'], momentum=params['momentum'])
    else:
        raise Exception('Wrong input for optimizer parameter given.')
    return optimizer

#early stopping callback
early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss', 
        verbose=1,
        patience=5,
        mode='min',
        restore_best_weights=True)

**Separated neural networks:**

In [None]:
def create_paper_model():
    '''Function that creates and compiles the baseline neural network (categorical, numeric + separated pathways)'''
    #Customer pathway
    user_layer = keras.layers.Dense(256, activation='relu', use_bias = True)(feature_layer_users)
    user_layer = SkipCon(size = 256, deep = 2, reduce = False, skip_when=1, activation="relu")(user_layer)
    user_layer = keras.layers.Dropout(0.5)(user_layer) 
    user_layer = SkipCon(size = 256, deep = 2, reduce = True, skip_when=1, activation="relu")(user_layer)
    user_layer = keras.layers.Dropout(0.5)(user_layer)
    user_layer = SkipCon(size = 64, deep = 2, reduce = True, skip_when=0, activation="relu")(user_layer)

    #Item pathway
    item_layer = keras.layers.Dense(256, activation='relu', use_bias = True)(feature_layer_items)
    item_layer = SkipCon(size = 256, deep = 2, reduce = False, skip_when=1, activation="relu")(item_layer)
    item_layer = keras.layers.Dropout(0.5)(item_layer) 
    item_layer = SkipCon(size = 256, deep = 2, reduce = True, skip_when=1, activation="relu")(item_layer)
    item_layer = keras.layers.Dropout(0.5)(item_layer)
    item_layer = SkipCon(size = 64, deep = 2, reduce = True, skip_when=0, activation="relu")(item_layer)

    # combine the output of the two branches
    combined = tf.concat([user_layer, item_layer], axis =-1)

    # additional feed-forward layers
    both_layer = SkipCon(size = 64, deep = 2, reduce = False, skip_when=1, activation="relu")(combined)
    both_layer = keras.layers.Dropout(0.5)(both_layer)
    both_layer = SkipCon(size = 16, deep = 2, reduce = False, skip_when=0, activation="relu")(both_layer)

    # output layer
    z = keras.layers.Dense(3, activation="softmax")(both_layer)

    random_seed(123)
    model = keras.Model(inputs=[input_user, input_items], outputs=z)

    optimizer = get_optimizer()
    
    #compile network
    random_seed(123)
    model.compile(optimizer= optimizer,
                  loss='categorical_crossentropy',
                  metrics=METRICS)
    return model

In [None]:
def create_nlp_model():
    '''Function that creates and compiles the NLP model'''
    
    print('loading language model')
    #download USE from TensorFlow Hub
    embedding = hub.KerasLayer("https://tfhub.dev/google/universal-sentence-encoder/4", trainable=params['trainable'] , dtype=tf.string, input_shape=[], output_shape=[512])(input_nlp)
    nlp_layer = keras.layers.Dense(256, activation='relu')(embedding)
    nlp_layer = keras.layers.Dense(128, activation='relu')(nlp_layer)
    nlp_layer = keras.layers.Dense(128, activation='relu')(nlp_layer)
    nlp_layer = keras.layers.Dense(64, activation='relu')(nlp_layer)
    nlp_layer = keras.layers.Dense(16, activation='relu')(nlp_layer)

    #output layer
    z = keras.layers.Dense(3, activation="softmax")(nlp_layer)

    random_seed(123)
    model = keras.Model(inputs=[input_nlp], outputs=z)

    optimizer = get_optimizer()
    
    #compile network
    random_seed(123)
    model.compile(optimizer= optimizer,
                  loss='categorical_crossentropy',
                  metrics=METRICS)
    return model

In [None]:
def create_combined_model():
    '''Function that creates and compiles the combined neural network (categorical, numeric and text features)'''
    #Customer pathway
    user_layer = keras.layers.Dense(256, activation='relu', use_bias = True)(feature_layer_users)
    user_layer = SkipCon(size = 256, deep = 2, reduce = False, skip_when=1, activation="relu")(user_layer)
    user_layer = keras.layers.Dropout(0.5)(user_layer)
    user_layer = SkipCon(size = 256, deep = 2, reduce = True, skip_when=1, activation="relu")(user_layer)
    user_layer = keras.layers.Dropout(0.5)(user_layer)
    user_layer = SkipCon(size = 64, deep = 2, reduce = True, skip_when=0, activation="relu")(user_layer)

    #Item pathway
    item_layer = keras.layers.Dense(256, activation='relu', use_bias = True)(feature_layer_items)
    item_layer = SkipCon(size = 256, deep = 2, reduce = False, skip_when=1, activation="relu")(item_layer)
    item_layer = keras.layers.Dropout(0.5)(item_layer) # Way to handle overfitting
    item_layer = SkipCon(size = 256, deep = 2, reduce = True, skip_when=1, activation="relu")(item_layer)
    item_layer = keras.layers.Dropout(0.5)(item_layer)
    item_layer = SkipCon(size = 64, deep = 2, reduce = True, skip_when=0, activation="relu")(item_layer)

    #Text pathway
    print('loading language model')
    embedding = hub.KerasLayer("https://tfhub.dev/google/universal-sentence-encoder/4", trainable=params['trainable'] , dtype=tf.string, input_shape=[], output_shape=[512])
    embedding = embedding(input_nlp)
    nlp_layer = keras.layers.Dense(256, activation='relu')(embedding)
    nlp_layer = keras.layers.Dense(128, activation='relu')(nlp_layer)
    nlp_layer = keras.layers.Dense(128, activation='relu')(nlp_layer)
    nlp_layer = keras.layers.Dense(64, activation='relu')(nlp_layer)

    # combine the output of the three branches
    combined = tf.concat([user_layer, item_layer, nlp_layer], axis =-1)

    #additional feed-forward layers
    both_layer = SkipCon(size = 64, deep = 2, reduce = False, skip_when=1, activation="relu")(combined)
    both_layer = keras.layers.Dropout(0.5)(both_layer)
    both_layer = SkipCon(size = 16, deep = 2, reduce = False, skip_when=0, activation="relu")(both_layer)

    #output layer
    z = keras.layers.Dense(3, activation="softmax")(both_layer)

    random_seed(123)
    model = keras.Model(inputs=[input_user, input_items, input_nlp], outputs=z)

    optimizer = get_optimizer()
    
    #compile network
    random_seed(123)
    model.compile(optimizer= optimizer,
                  loss='categorical_crossentropy',
                  metrics=METRICS)
    return model

**Not separated neural networks:**

In [None]:
def create_not_separated_embedding_model():
    #hidden layers
    tab_layer = keras.layers.Dense(512, activation='relu', use_bias = True)(feature_layer_tab)
    tab_layer = SkipCon(size = 512, deep = 2, reduce = False, skip_when=1, activation="relu")(tab_layer)
    tab_layer = keras.layers.Dropout(0.5)(tab_layer) 
    tab_layer = SkipCon(size = 512, deep = 2, reduce = True, skip_when=1, activation="relu")(tab_layer)
    tab_layer = keras.layers.Dropout(0.5)(tab_layer)
    tab_layer = SkipCon(size = 128, deep = 2, reduce = True, skip_when=0, activation="relu")(tab_layer)
    tab_layer = SkipCon(size = 64, deep = 2, reduce = False, skip_when=1, activation="relu")(tab_layer)
    tab_layer = keras.layers.Dropout(0.5)(tab_layer)
    tab_layer = SkipCon(size = 16, deep = 2, reduce = False, skip_when=0, activation="relu")(tab_layer)

    #output layer
    z = keras.layers.Dense(3, activation="softmax")(tab_layer)

    random_seed(123)
    model = keras.Model(inputs=[input_tab], outputs=z)

    optimizer = get_optimizer()
    
    #compile neural network
    random_seed(123)
    model.compile(optimizer= optimizer, 
                  loss='categorical_crossentropy',
                  metrics=METRICS)
    return model

In [None]:
def create_not_separated_one_hot_model():
    #hidden layers
    tab_layer = keras.layers.Dense(256, activation='relu', use_bias = True)(feature_layer_tab)
    tab_layer = SkipCon(size = 256, deep = 2, reduce = False, skip_when=1, activation="relu")(tab_layer)
    tab_layer = keras.layers.Dropout(0.5)(tab_layer)
    tab_layer = SkipCon(size = 256, deep = 2, reduce = True, skip_when=1, activation="relu")(tab_layer)
    tab_layer = keras.layers.Dropout(0.5)(tab_layer)
    tab_layer = SkipCon(size = 64, deep = 2, reduce = True, skip_when=0, activation="relu")(tab_layer)
    tab_layer = SkipCon(size = 32, deep = 2, reduce = False, skip_when=1, activation="relu")(tab_layer)
    tab_layer = keras.layers.Dropout(0.5)(tab_layer)
    tab_layer = SkipCon(size = 8, deep = 2, reduce = False, skip_when=0, activation="relu")(tab_layer)

    #output layer
    z = keras.layers.Dense(3, activation="softmax")(tab_layer)

    random_seed(123)
    model = keras.Model(inputs=[input_tab], outputs=z)

    optimizer = get_optimizer()
    
    #compile neural network
    random_seed(123)
    model.compile(optimizer= optimizer,
                  loss='categorical_crossentropy',
                  metrics=METRICS)
    return model

**Helper Functions**

In [None]:
def order_history(history):
    train_dict=dict()
    val_dict=dict()
    for (key, value) in history.items():
       # Check if key is even then add pair to new dictionary
        if key.split('_')[0] == 'val':
            val_dict[key] = value
        else:
            train_dict[key] = value
    ordered_history = train_dict.copy()
    ordered_history.update(val_dict)
    return ordered_history

def add_fold_to_dict(history, dt):
    history = history.copy()
    if dt == {}:
        dt = dict(history)
    else:
        for key in dt.keys():
            dt[key].extend(history[key])
    return dt

def get_avg_column_val(df):
    averages = list()
    for i in range(len(df.columns)):
        averages.append(df[i].mean())
    return averages

def kfold_results(dt, n_folds, epochs):
    averages = dict()
    for key in dt.keys():
        df = pd.DataFrame(pd.Series(dt[key]).values.reshape(n_folds,epochs))
        averages[key] = get_avg_column_val(df)
    epochs = np.arange(epochs)
    results_lists = {'epochs': epochs}
    results_lists.update(averages)
    results = pd.DataFrame(results_lists)
    return results

def plot_kfold_results(results):
    nb_epochs=results.shape[0]
    fig,ax = plt.subplots(2,1,figsize=(8,12))
    ax[0].plot(list(range(nb_epochs)), results['loss'], label='Training loss')
    ax[0].plot(list(range(nb_epochs)), results['val_loss'], label='Validation loss')
    ax[0].set_xlabel('Epoch')
    ax[0].xaxis.set_ticks(np.arange(0,nb_epochs,1))
    ax[0].set_ylabel('Loss')
    ax[0].legend(loc='best')
    ax[1].plot(list(range(nb_epochs)),results['binary_accuracy'], label='Training Accuracy')
    ax[1].plot(list(range(nb_epochs)),results['val_binary_accuracy'], label='Validation Accuracy')
    ax[1].xaxis.set_ticks(np.arange(0,nb_epochs,1))
    ax[1].set_xlabel('Epoch')
    ax[1].set_ylabel('Accuracy / %')
    ax[1].legend(loc='best')

def print_results(cv_results, test_results=None):
    print('\n')
    print('-'*15)
    print('Test Set Results:')
    print('\n')
    pprint.pprint(test_results)
    
    print('\n')
    print('-'*15)
    print('Cross-Validation Results (averaged over folds):')
    print('\n')
    print(cv_results)
    plot_kfold_results(cv_results)

In [None]:
def get_labels(labels):
    '''Return one-hot encoded labels'''
    labels = labels.apply(lambda x:x == CLASS_LABELS)
    labels *= 1
    return labels

def calc_metrics(logs:dict, predict_probs, target, validation=False, test=False):
    '''
    Function to calculate the evaluation metrics
    
    Parameters:
    logs: dict with logs
    predict: prediction probabilities
    target: target labels
    validation: True if needed to calculate validation metrics
    '''
    
    #get actual class prediction and target
    predict_classes = np.argmax(predict_probs, axis=1)
    target_classes = np.argmax(target, axis=1)
    
    if validation:
        prefix = 'val_'
    elif test:
        prefix = 'test_'
    else:
        prefix = '' 
    metrics = dict()
    metrics[prefix + 'f1_micro'] = f1_score(target_classes, predict_classes, average='micro')
    metrics[prefix + 'f1_macro'] = f1_score(target_classes, predict_classes, average='macro')
    metrics[prefix + 'auc_micro'] = roc_auc_score(target, predict_probs, average='micro')
    metrics[prefix + 'auc_macro'] = roc_auc_score(target, predict_probs, average='macro', multi_class='ovr')

    #prepare print message
    message = ''
    message = [message + f' - {metric}: {metrics[metric]}' for metric in metrics.keys()]

    #add calculated metrics to logs
    for metric in metrics.keys():
        logs[metric] = metrics[metric]

    return logs, message


class Metrics(Callback):
    '''Callback to calculate additional metrics'''
    
    def __init__(self, training_data, train_targ, validation_data, val_targ, verbose=1):
        super(Callback, self).__init__()
        self.training_data = training_data
        self.train_targ = train_targ
        self.validation_data = validation_data
        self.val_targ = val_targ
        self.verbose = verbose
        
    def on_epoch_end(self, epoch, logs):
        #predict on train set
        train_predict_probs = np.asarray(self.model.predict(self.training_data))
        #targets
        train_target = list(self.train_targ)
        #calculate metrics
        logs, message_train = calc_metrics(logs, predict_probs=train_predict_probs, target=train_target)
        
        #metrics for validation set:
        val_predict_probs = np.asarray(self.model.predict(self.validation_data))
        val_target = list(self.val_targ)
        logs, message_val = calc_metrics(logs, predict_probs=val_predict_probs, target=val_target, validation=True)
        
        if self.verbose==1: print(message_train + message_val)
        return

class Test_Metrics(Callback):
    '''Callback to calculate metrics for test set'''
    def __init__(self, test_data, test_targ):
        super(Callback, self).__init__()
        self.test_data = test_data
        self.test_targ = test_targ
        self.metrics = dict()
        
    def on_test_end(self, logs):
        #calculate metrics for test set
        test_predict_probs = np.asarray(self.model.predict(self.test_data))
        test_target = list(self.test_targ)
        self.metrics, message = calc_metrics(self.metrics, predict_probs=test_predict_probs, target=test_target, test=True)
        return

In [None]:
def fit_model_test(train, test, model_fn, epochs, bs): 
    '''
    Function to fit model on complete training data and evaluate on test data
    
    Parameters:
    train: training data as pandas DataFrame
    test: testing data as pandas DataFrame
    model_fn: function name that creates the neural network
    epochs: number of epochs to train
    bs: batch size
    
    Returns evaluation results and training history
    '''
    
    train = train.copy()
    test = test.copy()
    
    #get TF datasets from pandas DataFrame
    random_seed(123)
    train_ds = df_to_dataset(train, shuffle=False, batch_size=bs)
    test_ds = df_to_dataset(test, shuffle=False, batch_size=None)
    
    #create model
    random_seed(123)
    model = model_fn()
    
    #fit model on complete training data
    random_seed(123)
    history = model.fit(train_ds, validation_data=test_ds, epochs=epochs, callbacks=[early_stopping])
    
    #evaluate the network with the test set:
    test_results = dict()
    #get one-hot encoded targets
    test_targ = get_labels(test['fit'])
    #prepare callback
    test_metrics = Test_Metrics(test_ds, test_targ)
    #evaluate 
    evaluation = model.evaluate(test_ds, callbacks=[test_metrics])
    #add metrics to test results
    for idx, metric in enumerate(model.metrics_names):
        test_results['test_' + metric] = evaluation[idx]
    test_results.update(test_metrics.metrics)
    print(test_results)
    
    return (test_results, history)

In [None]:
def fit_model_cv(train, model_fn, epochs, n_folds, bs, test=None):
    '''
    Function to fit model on training data with cross-validation
    
    Parameters:
    train: training data as pandas DataFrame
    model_fn: function name that creates the neural network
    epochs: number of epochs to train
    n_folds: number of folds into which the training data should be splitted
    bs: batch size
    test: optional - if given, additionally, a neural network is trained on the complete training data and evaluated on the test data

    
    Returns cross-validation results
    '''
    
    train = train.copy()
    if test is not None:
        test = test.copy()
        
    #tracking variable
    folds = dict()
    
    #prepare cross validation
    random_seed(123)
    stratified_k_fold = StratifiedKFold(n_folds, shuffle=True, random_state=1)
    
    #iterate over folds
    for iteration_idx, (train_idxs, valid_idxs) in enumerate(stratified_k_fold.split(train.loc[:,train.columns!=dep_name[0]], train[dep_name])):
        fold_idx = iteration_idx+1
        print('-'*20, '\n', f'> Fold: {fold_idx}'); print('-'*20)
        
        #get training and validation sets
        train_df = train.iloc[train_idxs]
        val_df = train.iloc[valid_idxs]
        #transform DataFrames into TF datasets
        train_ds = df_to_dataset(train_df, shuffle=False, batch_size=bs)
        train_ds_metric = df_to_dataset(train_df, shuffle=False, batch_size=None)
        val_ds = df_to_dataset(val_df, shuffle=False, batch_size=None)
        
        #add additional metrics (callback)
        train_labels = train_df['fit']
        val_labels = val_df['fit']
        metrics = Metrics(training_data=train_ds_metric, train_targ=get_labels(train_labels), validation_data=val_ds, val_targ=get_labels(val_labels), verbose=0)
    
        #create model
        random_seed(123)
        model = model_fn()
        
        #model fitting
        random_seed(123)
        history = model.fit(train_ds, validation_data=val_ds, epochs=epochs, callbacks=[metrics], verbose=0)
        history.history = order_history(history.history)
        
        #add fold to dict
        folds = add_fold_to_dict(history.history, folds)
        
        #clear session
        keras.backend.clear_session()
    
    #get results    
    results = kfold_results(folds, n_folds, epochs)  
    experiment_results = [results]
    
    if test is not None:
        #train a network on complete training set and evaluate on test set
        print('-'*15)
        print('Test Set: \n')
        
        #create, fit and evaluate network
        test_results, history_test = fit_model_test(train, test, params['model_fn'], epochs=epochs, bs=params['bs'])
        
        #get trained epochs in case network stopped early through callback
        final_epochs = epochs if early_stopping.stopped_epoch==0 else (early_stopping.stopped_epoch-early_stopping.patience+1)
        
        experiment_results.append(test_results)
      
    #show results
    if test is not None:
        print_results(experiment_results[0], experiment_results[1])
    else:
        print_results(experiment_results[0])
    return experiment_results

In [None]:
params['model_fn'] = create_paper_model
_ = fit_model_cv(train, test=test, model_fn=params['model_fn'], epochs=params['epochs'], n_folds=params['n_folds'], bs=params['bs'])

In [None]:
#nlp model example (change the model function); potentially memory issues when not enough memory for pre-trained model
'''
params['model_fn'] = create_nlp_model
params['epochs'] = 10
_=fit_model_cv(train, test=test, model_fn=params['model_fn'], epochs=params['epochs'], n_folds=params['n_folds'], bs=params['bs'])
'''

In [None]:
#not separated network example (change the model function)
#do not forget to uncomment the correct encoding section (input preparation) and use the respective model function
'''
params['model_fn'] = create_not_separated_embedding_model
params['epochs'] = 50
_=fit_model_cv(train, test=test, model_fn=params['model_fn'], epochs=params['epochs'], n_folds=params['n_folds'], bs=params['bs'])
'''