In [1]:
import pandas as pd
import os
import sys
import tensorflow as tf
import numpy as np
import tensorflow_hub as hub

In [2]:
import wandb
from wandb.keras import WandbCallback

In [3]:
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)

In [4]:
feature_names = ["product", "sub_product", "issue", "sub_issue", "state", "zip_code", "company", "company_response", "timely_response", "consumer_disputed", "consumer_complaint_narrative"]
one_hot_features = ['product', 'sub_product', 'company_response', 'state', 'issue']
numeric_features = ['zip_code']
text_features = ['consumer_complaint_narrative']

In [5]:
df = pd.read_csv('../data/6Mar/consumer_complaints_with_narrative.csv', usecols=feature_names)

In [6]:
df.head()

Unnamed: 0,product,sub_product,issue,sub_issue,consumer_complaint_narrative,company,state,zip_code,company_response,timely_response,consumer_disputed
0,Mortgage,Conventional fixed mortgage,"Loan servicing, payments, escrow account",,My mortgage servicing provider ( XXXX ) transf...,"SunTrust Banks, Inc.",TX,770XX,Closed with non-monetary relief,Yes,No
1,Debt collection,"Other (i.e. phone, health club, etc.)",Cont'd attempts collect debt not owed,Debt was paid,I HAVE NEVER RECEIVED ANY FORM OF NOTIFICATION...,ERC,CA,913XX,Closed with non-monetary relief,Yes,No
2,Debt collection,Credit card,Disclosure verification of debt,Not given enough info to verify debt,i contacted walmart and the manager there said...,Synchrony Financial,MA,010XX,Closed with non-monetary relief,Yes,No
3,Credit reporting,,Credit reporting company's investigation,No notice of investigation status/result,I have filed multiple complaints XXXX on this ...,"TransUnion Intermediate Holdings, Inc.",NY,141XX,Closed with explanation,Yes,Yes
4,Bank account or service,Other bank product/service,"Account opening, closing, or management",,Sofi has ignored my request to stop sending me...,"Social Finance, Inc.",TX,785XX,Closed with explanation,Yes,No


In [7]:
for col in one_hot_features:
    print(col)
    print(df[col].nunique())

product
11
sub_product
45
company_response
5
state
60
issue
90


In [7]:
df['consumer_disputed'] = df['consumer_disputed'].map({'Yes':1, 'No':0})

In [8]:
for feature in one_hot_features:
    df[feature] = df[feature].astype("category").cat.codes

In [9]:
one_hot_x = [pd.np.asarray(tf.keras.utils.to_categorical(df[feature_name].values)) for feature_name in one_hot_features]

In [10]:
embedding_x = [pd.np.asarray(df[feature_name].values).reshape(-1) for feature_name in text_features]

In [11]:
df['zip_code'] = df['zip_code'].str.replace('X', '0', regex=True)

In [12]:
df['zip_code'] = df['zip_code'].str.replace(r'\[|\*|\+|\-|`|\.|\ |\$|\/|!|\(', '0', regex=True)

In [13]:
df['zip_code'] = df['zip_code'].fillna(0)

In [14]:
df['zip_code'] = df['zip_code'].astype('int32')

In [15]:
df['zip_code'] = df['zip_code'].apply(lambda x: x//10000)

In [16]:
numeric_x = [df['zip_code'].values]

In [17]:
X = one_hot_x + numeric_x + embedding_x

In [18]:
y = np.asarray(df["consumer_disputed"], dtype=np.uint8).reshape(-1)

In [19]:
def get_model(show_summary=True):
    """
    Function defines a Keras model and returns the model as Keras object
    """
    wandb.init(project="consumer-complaints")
    config = wandb.config
    config.name='final_features_wide'
    #config.hidden_layer_size = 256
    config.optimizer = 'adam'
    config.learning_rate = 0.001
    config.data_version = 'cc_imbalanced_narrative'
    config.one_hot_features = one_hot_features
    config.numeric_features = numeric_features
    config.text_features = text_features
    
    # one-hot categorical features
    num_products = 11
    num_sub_products = 45
    num_company_responses = 5
    num_states = 60
    num_issues = 90

    input_product = tf.keras.Input(shape=(num_products,), name="product_xf")
    input_sub_product = tf.keras.Input(shape=(num_sub_products,), name="sub_product_xf")
    input_company_response = tf.keras.Input(shape=(num_company_responses,), name="company_response_xf")
    input_state = tf.keras.Input(shape=(num_states,), name="state_xf")
    input_issue = tf.keras.Input(shape=(num_issues,), name="issue_xf")
    
    # numeric features
    input_zip_code = tf.keras.Input(shape=(1,), name="zip_code_xf")

    # text features
    input_narrative = tf.keras.Input(shape=(1,), name="narrative_xf", dtype=tf.string)

    # embed text features
    module_url = "https://tfhub.dev/google/universal-sentence-encoder/4"
    embed = hub.KerasLayer(module_url)
    reshaped_narrative = tf.reshape(input_narrative, [-1])
    embed_narrative = embed(reshaped_narrative) 
    deep_ff = tf.keras.layers.Reshape((512, ), input_shape=(1, 512))(embed_narrative)
    
    deep = tf.keras.layers.Dense(256, activation='relu')(deep_ff)
    deep = tf.keras.layers.Dense(64, activation='relu')(deep)
    deep = tf.keras.layers.Dense(16, activation='relu')(deep)

    wide_ff = tf.keras.layers.concatenate(
        [input_product, input_sub_product, input_company_response, 
         input_state, input_issue, input_zip_code])
    wide = tf.keras.layers.Dense(16, activation='relu')(wide_ff)


    both = tf.keras.layers.concatenate([deep, wide])

    output = tf.keras.layers.Dense(1, activation='sigmoid')(both) 

    _inputs = [input_product, input_sub_product, input_company_response,  
               input_state, input_issue, input_zip_code, input_narrative]

    keras_model = tf.keras.models.Model(_inputs, output)
    keras_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                     loss='binary_crossentropy',  
                     metrics=[
                         tf.keras.metrics.BinaryAccuracy(),
                         tf.keras.metrics.TruePositives()
                         ])
    if show_summary:
        keras_model.summary()

    return keras_model

In [21]:
model = get_model(show_summary=False)

wandb: Wandb version 0.8.29 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade


In [22]:
model.fit(x=X, y=y, batch_size=32, validation_split=0.2, epochs=5, 
          callbacks=[WandbCallback()])

wandb: Wandb version 0.8.29 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x144aeae10>

In [7]:
#from IPython.display import Image

file_name = 'model.png'
tf.keras.utils.plot_model(model, to_file=file_name)
#Image(filename=file_name)

Failed to import pydot. You must install pydot and graphviz for `pydotprint` to work.


In [None]:
def get_model(show_summary=True):
    """
    Function defines a Keras model and returns the model as Keras object
    """
    
    # one-hot categorical features
    num_products = 11
    num_sub_products = 45
    num_company_responses = 5
    num_states = 60
    num_issues = 90

    input_product = tf.keras.Input(shape=(num_products,), name="product_xf")
    input_sub_product = tf.keras.Input(shape=(num_sub_products,), name="sub_product_xf")
    input_company_response = tf.keras.Input(shape=(num_company_responses,), name="company_response_xf")
    input_state = tf.keras.Input(shape=(num_states,), name="state_xf")
    input_issue = tf.keras.Input(shape=(num_issues,), name="issue_xf")
    
    # numeric features
    input_zip_code = tf.keras.Input(shape=(1,), name="zip_code_xf")

    # text features
    input_narrative = tf.keras.Input(shape=(1,), name="narrative_xf", dtype=tf.string)

    # embed text features
    module_url = "https://tfhub.dev/google/universal-sentence-encoder/4"
    embed = hub.KerasLayer(module_url)
    reshaped_narrative = tf.reshape(input_narrative, [-1])
    embed_narrative = embed(reshaped_narrative) 
    deep_ff = tf.keras.layers.Reshape((512, ), input_shape=(1, 512))(embed_narrative)
    
    deep = tf.keras.layers.Dense(256, activation='relu')(deep_ff)
    deep = tf.keras.layers.Dense(64, activation='relu')(deep)
    deep = tf.keras.layers.Dense(16, activation='relu')(deep)

    wide_ff = tf.keras.layers.concatenate(
        [input_product, input_sub_product, input_company_response, 
         input_state, input_issue, input_zip_code])
    wide = tf.keras.layers.Dense(16, activation='relu')(wide_ff)


    both = tf.keras.layers.concatenate([deep, wide])

    output = tf.keras.layers.Dense(1, activation='sigmoid')(both) 

    _inputs = [input_product, input_sub_product, input_company_response,  
               input_state, input_issue, input_zip_code, input_narrative]

    keras_model = tf.keras.models.Model(_inputs, output)
    keras_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                     loss='binary_crossentropy',  
                     metrics=[
                         tf.keras.metrics.BinaryAccuracy(),
                         tf.keras.metrics.TruePositives()
                         ])
    if show_summary:
        keras_model.summary()

    return keras_model