   # CREDIT RISK MODEL USING TENSORFLOW

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.cross_validation import train_test_split

In [96]:
data = pd.read_csv('/Users/Shared/TensorFlow/tensorflow_tryouts/lending-club-data.csv', low_memory=False)

In [97]:
print data.columns

Index([u'id', u'member_id', u'loan_amnt', u'funded_amnt', u'funded_amnt_inv',
       u'term', u'int_rate', u'installment', u'grade', u'sub_grade',
       u'emp_title', u'emp_length', u'home_ownership', u'annual_inc',
       u'is_inc_v', u'issue_d', u'loan_status', u'pymnt_plan', u'url', u'desc',
       u'purpose', u'title', u'zip_code', u'addr_state', u'dti',
       u'delinq_2yrs', u'earliest_cr_line', u'inq_last_6mths',
       u'mths_since_last_delinq', u'mths_since_last_record', u'open_acc',
       u'pub_rec', u'revol_bal', u'revol_util', u'total_acc',
       u'initial_list_status', u'out_prncp', u'out_prncp_inv', u'total_pymnt',
       u'total_pymnt_inv', u'total_rec_prncp', u'total_rec_int',
       u'total_rec_late_fee', u'recoveries', u'collection_recovery_fee',
       u'last_pymnt_d', u'last_pymnt_amnt', u'next_pymnt_d',
       u'last_credit_pull_d', u'collections_12_mths_ex_med',
       u'mths_since_last_major_derog', u'policy_code', u'not_compliant',
       u'status', u'inactiv

In [80]:
features = ['grade',                     # grade of the loan
            'sub_grade',                 # sub-grade of the loan
            'short_emp',                 # one year or less of employment
            'emp_length_num',            # number of years of employment
            'home_ownership',            # home_ownership status: own, mortgage or rent
            'dti',                       # debt to income ratio
            'purpose',                   # the purpose of the loan
            'term',                      # the term of the loan
            'last_delinq_none',          # has borrower had a delinquincy
            'last_major_derog_none',     # has borrower had 90 day or worse rating
            'revol_util',                # percent of available credit being used
            'total_rec_late_fee',        # total late fees received to day
           ]

target = 'safe_loans'                   # prediction target (y) (+1 means safe, -1 is risky)

In [98]:
data[target] = data['bad_loans'].apply(lambda x : +1 if x==0 else 0)

In [63]:
print data.dtypes

id                               int64
member_id                        int64
loan_amnt                        int64
funded_amnt                      int64
funded_amnt_inv                  int64
term                            object
int_rate                       float64
installment                    float64
grade                           object
sub_grade                       object
emp_title                       object
emp_length                      object
home_ownership                  object
annual_inc                     float64
is_inc_v                        object
issue_d                         object
loan_status                     object
pymnt_plan                      object
url                             object
desc                            object
purpose                         object
title                           object
zip_code                        object
addr_state                      object
dti                            float64
delinq_2yrs              

In [64]:
data[features]

Unnamed: 0,grade,sub_grade,short_emp,emp_length_num,home_ownership,dti,purpose,term,last_delinq_none,last_major_derog_none,revol_util,total_rec_late_fee
0,B,B2,0,11,RENT,27.65,credit_card,36 months,1,1,83.70,0.00
1,C,C4,1,1,RENT,1.00,car,60 months,1,1,9.40,0.00
2,C,C5,0,11,RENT,8.72,small_business,36 months,1,1,98.50,0.00
3,C,C1,0,11,RENT,20.00,other,36 months,0,1,21.00,16.97
4,A,A4,0,4,RENT,11.20,wedding,36 months,1,1,28.30,0.00
5,E,E1,0,10,RENT,5.35,car,36 months,1,1,87.50,0.00
6,F,F2,0,5,OWN,5.55,small_business,60 months,1,1,32.60,0.00
7,B,B5,1,1,RENT,18.08,other,60 months,1,1,36.50,0.00
8,C,C3,0,6,OWN,16.12,debt_consolidation,60 months,1,1,20.60,0.00
9,B,B5,0,11,OWN,10.78,debt_consolidation,36 months,1,1,67.10,0.00


In [142]:
CATEGORICAL_COLUMNS = ["grade", "sub_grade", "home_ownership",
                       "purpose", "term"]
CONTINUOUS_COLUMNS = ["emp_length_num", "dti", "revol_util", "total_rec_late_fee", "short_emp", "last_delinq_none", "last_major_derog_none"]

LABEL_COLUMN = [target]

In [143]:
training_set, test_set = train_test_split(data, test_size = 0.2)

In [144]:
print len(training_set)
print len(test_set)

98085
24522


In [145]:
def input_fn(df):
    # Creates a dictionary mapping from each continuous feature column name (k) to
    # the values of that column stored in a constant Tensor.
    continuous_cols = {k: tf.constant(df[k].values)
                       for k in CONTINUOUS_COLUMNS}
    # Creates a dictionary mapping from each categorical feature column name (k)
    # to the values of that column stored in a tf.SparseTensor.
    categorical_cols = {k: tf.SparseTensor(
        indices=[[i, 0] for i in range(df[k].size)],
        values=df[k].values,
        shape=[df[k].size, 1])
                        for k in CATEGORICAL_COLUMNS}
    # Merges the two dictionaries into one.
    feature_cols = dict(continuous_cols.items() + categorical_cols.items())
    # Converts the label column into a constant Tensor.
    label = tf.constant(df[LABEL_COLUMN].values)
    # Returns the feature columns and the label.
    return feature_cols, label

def train_input_fn():
    return input_fn(training_set)

def eval_input_fn():
    return input_fn(test_set)

In [146]:
grade_keys = list(data['grade'].unique())
sub_grade_keys = list(data['sub_grade'].unique())
home_ownership_keys = list(data['home_ownership'].unique())
purpose_keys = list(data['purpose'].unique())
term_keys = list(data['term'].unique())

In [147]:
print "grade_keys: ",grade_keys
print "sub_grade_keys: ",sub_grade_keys
print "home_ownership_keys: ",home_ownership_keys
print "purpose_keys: ",purpose_keys
print "term_keys: ",term_keys

grade_keys:  ['B', 'C', 'A', 'E', 'F', 'D', 'G']
sub_grade_keys:  ['B2', 'C4', 'C5', 'C1', 'A4', 'E1', 'F2', 'B5', 'C3', 'B1', 'D1', 'A1', 'B3', 'B4', 'C2', 'D2', 'A3', 'A5', 'D5', 'A2', 'E4', 'D3', 'D4', 'F3', 'E3', 'F1', 'E5', 'G4', 'E2', 'G2', 'F5', 'F4', 'G5', 'G1', 'G3']
home_ownership_keys:  ['RENT', 'OWN', 'MORTGAGE', 'OTHER']
purpose_keys:  ['credit_card', 'car', 'small_business', 'other', 'wedding', 'debt_consolidation', 'home_improvement', 'major_purchase', 'medical', 'moving', 'vacation', 'house']
term_keys:  [' 36 months', ' 60 months']


In [148]:
grade = tf.contrib.layers.sparse_column_with_keys(
  column_name="grade", keys=grade_keys)
sub_grade = tf.contrib.layers.sparse_column_with_keys(
  column_name="sub_grade", keys=sub_grade_keys)
home_ownership = tf.contrib.layers.sparse_column_with_keys(
  column_name="home_ownership", keys=home_ownership_keys)
purpose = tf.contrib.layers.sparse_column_with_keys(
  column_name="purpose", keys=purpose_keys)
term = tf.contrib.layers.sparse_column_with_keys(
  column_name="term", keys=term_keys)

In [149]:
emp_length_num = tf.contrib.layers.real_valued_column("emp_length_num")
dti = tf.contrib.layers.real_valued_column("dti")
revol_util = tf.contrib.layers.real_valued_column("revol_util")
total_rec_late_fee = tf.contrib.layers.real_valued_column("total_rec_late_fee")
short_emp = tf.contrib.layers.real_valued_column("short_emp")
last_delinq_none = tf.contrib.layers.real_valued_column("last_delinq_none")
last_major_derog_none = tf.contrib.layers.real_valued_column("last_major_derog_none")

In [150]:
import tempfile

In [151]:
model_dir = tempfile.mkdtemp()
model = tf.contrib.learn.LinearClassifier(feature_columns=[grade, sub_grade, short_emp, emp_length_num, 
                                                           home_ownership, dti, purpose, term, last_delinq_none, 
                                                           last_major_derog_none, revol_util, total_rec_late_fee],
  model_dir=model_dir)

In [152]:
model.fit(input_fn=train_input_fn, steps=200)



LinearClassifier()

In [153]:
results = model.evaluate(input_fn=eval_input_fn, steps=1)
for key in sorted(results):
    print "%s: %s" % (key, results[key])



accuracy: 0.678737
eval_auc: 0.0
loss: 0.187504


In [154]:
model_reg_dir = tempfile.mkdtemp()
model_reg = tf.contrib.learn.LinearClassifier(feature_columns=[grade, sub_grade, short_emp, emp_length_num, 
                                                           home_ownership, dti, purpose, term, last_delinq_none, 
                                                           last_major_derog_none, revol_util, total_rec_late_fee],
                                          optimizer=tf.train.FtrlOptimizer(
                                                        learning_rate=0.1,
                                                        l1_regularization_strength=5,
                                                        l2_regularization_strength=5),
                                        model_dir=model_dir)

In [155]:
model_reg.fit(input_fn=train_input_fn, steps=200)



LinearClassifier()

In [156]:
results = model_reg.evaluate(input_fn=eval_input_fn, steps=1)
for key in sorted(results):
    print "%s: %s" % (key, results[key])



accuracy: 0.678737
eval_auc: 0.0
loss: 0.187504


In [157]:
model.linear_weights_

{'linear/dti_weight': array([[-0.03771699]], dtype=float32),
 'linear/emp_length_num_weight': array([[ 0.01967828]], dtype=float32),
 'linear/grade_weights': array([[ 0.43817055],
        [ 0.14012535],
        [ 1.01390278],
        [-0.35323787],
        [-0.65565318],
        [-0.1863524 ],
        [-0.22389808]], dtype=float32),
 'linear/home_ownership_weights': array([[-0.05679193],
        [ 0.06166209],
        [ 0.32435447],
        [-0.01863182]], dtype=float32),
 'linear/last_delinq_none_weight': array([[-0.08802346]], dtype=float32),
 'linear/last_major_derog_none_weight': array([[ 0.03810053]], dtype=float32),
 'linear/purpose_weights': array([[  3.84579808e-01],
        [  1.63014546e-01],
        [ -7.22101271e-01],
        [ -1.19044095e-01],
        [  1.18902937e-01],
        [  1.60974890e-01],
        [  5.58535941e-02],
        [  1.67711556e-01],
        [ -3.96581180e-02],
        [ -4.02224064e-03],
        [ -6.32864598e-04],
        [  2.22200863e-02]], dtype=f

# ADDING MORE FEATURES

In [109]:
data[features]

Unnamed: 0,grade,sub_grade_num,short_emp,emp_length_num,home_ownership,dti,purpose,payment_inc_ratio,delinq_2yrs,delinq_2yrs_zero,...,pub_rec_zero,revol_util,total_rec_late_fee,int_rate,total_rec_int,annual_inc,funded_amnt,funded_amnt_inv,installment,safe_loans
0,B,0.4,0,11,RENT,27.65,credit_card,8.143500,0.0,1.0,...,1.0,83.70,0.00,10.65,861.07,24000.0,5000,4975,162.87,1
1,C,0.8,1,1,RENT,1.00,car,2.393200,0.0,1.0,...,1.0,9.40,0.00,15.27,435.17,30000.0,2500,2500,59.83,-1
2,C,1.0,0,11,RENT,8.72,small_business,8.259550,0.0,1.0,...,1.0,98.50,0.00,15.96,603.65,12252.0,2400,2400,84.33,1
3,C,0.2,0,11,RENT,20.00,other,8.275850,0.0,1.0,...,1.0,21.00,16.97,13.49,2209.33,49200.0,10000,10000,339.31,1
4,A,0.8,0,4,RENT,11.20,wedding,5.215330,0.0,1.0,...,1.0,28.30,0.00,7.90,631.38,36000.0,5000,5000,156.46,1
5,E,0.2,0,10,RENT,5.35,car,2.735750,0.0,1.0,...,1.0,87.50,0.00,18.64,938.14,48000.0,3000,3000,109.43,1
6,F,0.4,0,5,OWN,5.55,small_business,4.571700,0.0,1.0,...,1.0,32.60,0.00,21.28,294.94,40000.0,5600,5600,152.39,-1
7,B,1.0,1,1,RENT,18.08,other,9.716000,0.0,1.0,...,1.0,36.50,0.00,12.69,533.42,15000.0,5375,5350,121.45,-1
8,C,0.6,0,6,OWN,16.12,debt_consolidation,2.557500,0.0,1.0,...,1.0,20.60,0.00,14.65,1177.52,72000.0,6500,6500,153.45,1
9,B,1.0,0,11,OWN,10.78,debt_consolidation,6.440640,0.0,1.0,...,1.0,67.10,0.00,12.69,1943.08,75000.0,12000,12000,402.54,1


In [113]:
features = ['grade',                     # grade of the loan (categorical)
            'sub_grade_num',             # sub-grade of the loan as a number from 0 to 1
            'short_emp',                 # one year or less of employment
            'emp_length_num',            # number of years of employment
            'home_ownership',            # home_ownership status: own, mortgage or rent
            'dti',                       # debt to income ratio
            'purpose',                   # the purpose of the loan
            'payment_inc_ratio',         # ratio of the monthly payment to income
            'delinq_2yrs',               # number of delinquincies 
            'delinq_2yrs_zero',          # no delinquincies in last 2 years
            'inq_last_6mths',            # number of creditor inquiries in last 6 months
            'last_delinq_none',          # has borrower had a delinquincy
            'last_major_derog_none',     # has borrower had 90 day or worse rating
            'open_acc',                  # number of open credit accounts
            'pub_rec',                   # number of derogatory public records
            'pub_rec_zero',              # no derogatory public records
            'revol_util',                # percent of available credit being used
            'total_rec_late_fee',        # total late fees received to day
            'int_rate',                  # interest rate of the loan
            'total_rec_int',             # interest received to date
            'annual_inc',                # annual income of borrower
            'funded_amnt',               # amount committed to the loan
            'funded_amnt_inv',           # amount committed by investors for the loan
            'installment',               # monthly payment owed by the borrower
           ]

In [111]:
data_clean  = data[features].dropna()

In [112]:
len(data_clean)

122578

In [92]:
data_clean = data_temp.dropna()

In [93]:
len(data_clean)

122607

In [115]:
training_set, test_set = train_test_split(data_clean, test_size = 0.2)
print len(training_set)
print len(test_set)

98062
24516


In [116]:
tensor_features = {}
CATEGORICAL_COLUMNS = []
CONTINUOUS_COLUMNS = []

for i in features:
    if (data[i].dtype == 'int64') or (data[i].dtype == 'float64'):
        CONTINUOUS_COLUMNS.append(i) 
        tensor_features[i] = tf.contrib.layers.real_valued_column(i)
    else:
        CATEGORICAL_COLUMNS.append(i)
        tensor_features[i] = tf.contrib.layers.sparse_column_with_keys(
                              column_name=i, keys=list(data[i].unique()))

In [117]:
print tensor_features['grade']

_SparseColumn(column_name='grade', is_integerized=False, bucket_size=None, lookup_config=_SparseIdLookupConfig(vocabulary_file=None, keys=('B', 'C', 'A', 'E', 'F', 'D', 'G'), num_oov_buckets=0, vocab_size=7, default_value=-1), weight_column=None, combiner='sum', dtype=tf.string)


In [118]:
print tensor_features['annual_inc']

_RealValuedColumn(column_name='annual_inc', dimension=1, default_value=None, dtype=tf.float32)


In [119]:
CATEGORICAL_COLUMNS

['grade', 'home_ownership', 'purpose']

In [120]:
CONTINUOUS_COLUMNS

['sub_grade_num',
 'short_emp',
 'emp_length_num',
 'dti',
 'payment_inc_ratio',
 'delinq_2yrs',
 'delinq_2yrs_zero',
 'inq_last_6mths',
 'last_delinq_none',
 'last_major_derog_none',
 'open_acc',
 'pub_rec',
 'pub_rec_zero',
 'revol_util',
 'total_rec_late_fee',
 'int_rate',
 'total_rec_int',
 'annual_inc',
 'funded_amnt',
 'funded_amnt_inv',
 'installment']

In [134]:
model_dir = tempfile.mkdtemp()
model = tf.contrib.learn.LinearClassifier(feature_columns=[tensor_features['grade'],                     # grade of the loan (categorical)
            tensor_features['sub_grade_num'],             # sub-grade of the loan as a number from 0 to 1
            tensor_features['short_emp'],                 # one year or less of employment
            tensor_features['emp_length_num'],            # number of years of employment
            tensor_features['home_ownership'],            # home_ownership status: own, mortgage or rent
            tensor_features['dti'],                       # debt to income ratio
            tensor_features['purpose'],                   # the purpose of the loan
           tensor_features['payment_inc_ratio'],         # ratio of the monthly payment to income
#            tensor_features['delinq_2yrs'],               # number of delinquincies 
#            tensor_features['delinq_2yrs_zero'],          # no delinquincies in last 2 years
#            tensor_features['inq_last_6mths'],            # number of creditor inquiries in last 6 months
            tensor_features['last_delinq_none'],          # has borrower had a delinquincy
#            tensor_features['last_major_derog_none'],     # has borrower had 90 day or worse rating
#            tensor_features['open_acc'],                  # number of open credit accounts
#            tensor_features['pub_rec'],                   # number of derogatory public records
#            tensor_features['pub_rec_zero'],              # no derogatory public records
            tensor_features['revol_util'],                # percent of available credit being used
            tensor_features['total_rec_late_fee'],        # total late fees received to day
#            tensor_features['int_rate'],                  # interest rate of the loan
#            tensor_features['total_rec_int'],             # interest received to date
            tensor_features['annual_inc']],                # annual income of borrower
#           tensor_features['funded_amnt'],               # amount committed to the loan
#            tensor_features['funded_amnt_inv'],           # amount committed by investors for the loan
#            tensor_features['installment']],               # monthly payment owed by the borrower
  model_dir=model_dir)

In [None]:
'grade',                     # grade of the loan
            'sub_grade',                 # sub-grade of the loan
            'short_emp',                 # one year or less of employment
            'emp_length_num',            # number of years of employment
            'home_ownership',            # home_ownership status: own, mortgage or rent
            'dti',                       # debt to income ratio
            'purpose',                   # the purpose of the loan
            'term',                      # the term of the loan
            'last_delinq_none',          # has borrower had a delinquincy
            'last_major_derog_none',     # has borrower had 90 day or worse rating
            'revol_util',                # percent of available credit being used
            'total_rec_late_fee',        # total late fees received to day
           ]

In [139]:
model.fit(input_fn=train_input_fn, steps=200)



LinearClassifier()

In [140]:
results = model.evaluate(input_fn=eval_input_fn, steps=1)
for key in sorted(results):
    print "%s: %s" % (key, results[key])



accuracy: 0.0
eval_auc: 0.0
loss: 444.698


In [141]:
model.weights_

{'linear/annual_inc_weight': array([[-0.00838672]], dtype=float32),
 'linear/dti_weight': array([[-1.29763913]], dtype=float32),
 'linear/emp_length_num_weight': array([[-0.62183005]], dtype=float32),
 'linear/grade_weights': array([[ 0.68759507],
        [-1.14004457],
        [ 2.54913735],
        [-2.71312141],
        [-2.34279203],
        [-2.51851296],
        [-0.69874597]], dtype=float32),
 'linear/home_ownership_weights': array([[-1.32648158],
        [-0.48917532],
        [-0.09501822],
        [-0.02884799]], dtype=float32),
 'linear/last_delinq_none_weight': array([[-0.69723344]], dtype=float32),
 'linear/purpose_weights': array([[ 0.28210583],
        [ 0.27676743],
        [-1.41750586],
        [-1.05768549],
        [ 0.07574984],
        [-0.9525066 ],
        [ 0.14560367],
        [ 0.31382683],
        [-0.2215791 ],
        [-0.15563148],
        [-0.0113038 ],
        [-0.03260896]], dtype=float32),
 'linear/revol_util_weight': array([[-1.31407404]], dtype=floa