In [1]:
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.preprocessing import MinMaxScaler
import tabpy_client

In [2]:
train = pd.read_csv('LoanStatsFilter.csv')
train = train[train.inactive_loans == 1]
print('Loans have the following purposes:\n',train['purpose'].unique())

Loans have the following purposes:
 ['debt_consolidation' 'other' 'car' 'moving' 'credit_card' 'vacation'
 'major_purchase' 'small_business' 'house' 'wedding' 'medical'
 'home_improvement']


In [3]:
test = train.iloc[:,1:6]

In [4]:
enc = preprocessing.LabelEncoder()
enc2 = preprocessing.LabelEncoder()
train['grade'] = enc.fit_transform(train['grade'])
train['purpose'] = enc2.fit_transform(train['purpose'])


In [5]:
print(train.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 122603 entries, 0 to 122602
Data columns (total 8 columns):
id                122603 non-null int64
grade             122603 non-null int64
annual_inc        122603 non-null int64
sub_grade_num     122603 non-null float64
purpose           122603 non-null int64
dti               122603 non-null float64
bad_loans         122603 non-null int64
inactive_loans    122603 non-null int64
dtypes: float64(2), int64(6)
memory usage: 8.4 MB
None


In [6]:
# Separate the data into the class labels y and the feature variables X.
targets = train['bad_loans']
y = np.array(train['bad_loans']).astype(int)
X = train.ix[:,1:6]
print(X.head())

   grade  annual_inc  sub_grade_num  purpose    dti
0      6        1896            0.6        2  18.99
1      1        2000            0.4        8   0.00
2      4        3000            0.4        0  10.40
3      0        3300            0.6        8   0.00
4      0        3500            0.4        8   5.14


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  after removing the cwd from sys.path.


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=None, stratify=y)

In [8]:
scaler = MinMaxScaler(feature_range=(0.0, 1.0))
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
model = MLPClassifier(hidden_layer_sizes=(500,500,100), activation='relu', solver='adam', alpha=1e-5,
                                         batch_size='auto', learning_rate='constant', learning_rate_init=0.0001,
                                         power_t=0.5, max_iter=10, shuffle=True, random_state=None,
                                         tol=0.00001, verbose=True, warm_start=False, momentum=0.9,
                                         nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1,
                                         beta_1=0.9, beta_2=0.999, epsilon=1e-08)

In [None]:
model.fit(X_train, y_train)

In [None]:
predictions = model.predict(X_test)
print(predictions[:10])
probs = model.predict_proba(X_test)
print(probs[:10])

In [None]:
threshold = 0.2
threshold_preds = []
for i in range(len(probs)):
    if probs[i][1] >= threshold:
        threshold_preds.append(1)
    else:
        threshold_preds.append(0)

In [None]:
accuracy = metrics.accuracy_score(y_test, threshold_preds)
print("The model produced {0}% accurate predictions.".format(accuracy*100))

In [None]:
print(metrics.classification_report(y_test, threshold_preds))

In [None]:
metrics.confusion_matrix(y_test, threshold_preds)

In [None]:
def loanclassifierfull(_arg1, _arg2, _arg3, _arg4, _arg5):
    from pandas import DataFrame

    # Load data from tableau (brought in as lists) into a dictionary
    # The columns get sorted alphabetically in this constructor
    # Adding the numbers sorts them correctly
    d = {'1-grade': _arg1, '2-income': _arg2, '3-sub_grade_num': _arg3, '4-purpose': _arg4, '5-dti': _arg5}
    
    # Convert the dictionary to a Pandas Dataframe
    df = DataFrame(data=d)

    # Transform categorical variables into numerical/continuous features
    df['1-grade'] = enc.transform(df['1-grade'])
    df['4-purpose'] = enc2.transform(df['4-purpose'])
    print(df.head())

    # We need to scale the inputs to the Model or it will be off
    # The scaler, since it's saved in the code, will be pickled automatically by TabPy and available for reuse
    # This should also be the case for the feature encoder above
    df = scaler.transform(df)

    # Use the loaded model to develop predictions for the new data from Tableau
    probs = model.predict_proba(df)
    return [loan[1] for loan in probs]

In [None]:
func_probs =loanclassifierfull(test.iloc[:,0],test.iloc[:,1],test.iloc[:,2],test.iloc[:,3],test.iloc[:,4])
print('Calc Results Come After This')
print(func_probs[:10])

In [None]:
client = tabpy_client.Client('http://localhost:9004')

In [None]:
client.deploy('loanclassifierfull', loanclassifierfull,
              'Returns the probablility that a loan will result in a bad loan based on its Grade, Income, '
              'SubGradeNum, Purpose, and DTI', override=True)