In [None]:
!pip3 -q install sklearn pandas

import pandas as pd
import sklearn
print("pandas version {} installed".format(pd.__version__))
print("scikit-learn version {} installed".format(sklearn.__version__))

In [None]:
# %load model_definition.py
"""Create Keras model"""

from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.constraints import maxnorm

def create_model(input_dim, output_dim):
    # create model
    model = Sequential()
    # input layer
    model.add(Dense(100, input_dim=input_dim, activation='relu', W_constraint=maxnorm(3)))
    model.add(Dropout(0.2))
    
    # hidden layer
    model.add(Dense(100, activation='relu', W_constraint=maxnorm(3)))
    model.add(Dropout(0.2))
    
    # output layer
    model.add(Dense(output_dim, activation='softmax'))
    
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [None]:
# Import pandas 
import train_util as util

helper = util.LendingClubModelHelper()

# Read in lending club data 
helper.read_csv("lc-2015-loans.csv", 
                util.APPLICANT_NUMERIC +
                util.APPLICANT_CATEGORICAL +
                util.CREDIT_NUMERIC +
                util.LABEL)


print(helper.lcdata.info())


In [None]:
%matplotlib inline

# Show a correlation matrix of the features in our data set
plots.plot_correlation_matrix(helper.lcdata)

In [None]:
import os

# Divide the data set into training and test sets
helper.split_data(util.APPLICANT_NUMERIC + util.CREDIT_NUMERIC,
                  util.APPLICANT_CATEGORICAL,
                  util.LABEL,
                  test_size = 0.2,
                  row_limit = os.environ.get("sample"))

# Inspect our training data
print(helper.x_train.sample())

# Loan grade has been one-hot encoded
print(helper.y_train.sample())

In [None]:
from model_definition import create_model

history = helper.train_model(create_model)

# list all data in history
print(history.history.keys())

In [None]:
plots.plot_history(history)

In [None]:
import numpy as np
from sklearn.metrics import confusion_matrix, f1_score

y_pred = helper.model.predict(helper.x_test.as_matrix())

# Revert one-hot encoding to classes
y_pred_classes = pd.DataFrame((y_pred.argmax(1)[:,None] == np.arange(y_pred.shape[1])),
                              columns=helper.y_test.columns,
                              index=helper.y_test.index)

y_test_vals = helper.y_test.idxmax(1)
y_pred_vals = y_pred_classes.idxmax(1)

# F1 score
# Use idxmax() to convert back from one-hot encoding
f1 = f1_score(y_test_vals, y_pred_vals, average='weighted')
print("Test Set Accuracy: {:.00%}".format(f1))

# Confusion matrix
cfn_matrix = confusion_matrix(y_test_vals, y_pred_vals)
util.plot_confusion_matrix(cfn_matrix, classes=helper.y_test.columns)