In [1]:
import pandas as pd

Data from Kaggel lendnig club loan data:<br>
https://www.kaggle.com/wendykan/lending-club-loan-data#

In [2]:
loan_data = pd.read_csv('loan.csv', low_memory=False)

FileNotFoundError: [Errno 2] No such file or directory: 'loan.csv'

In [None]:
loan_data.head()

In [None]:
loan_data.shape

## Simple cleaning

In [None]:
loan_data_clean = loan_data.loc[loan_data['term'] == ' 60 months']

In [None]:
loan_data_clean = loan_data_clean.loc[ ~loan_data_clean['loan_status'].isin(['Current', 'In Grace Period']) ]

In [None]:
loan_data_clean = loan_data_clean.loc[(loan_data_clean['annual_inc'] >= 1000.0) & (loan_data_clean['annual_inc'] <= 1000000.0)]

In [None]:
loan_data_clean = loan_data_clean.loc[loan_data_clean['home_ownership'] != 'ANY']

In [None]:
loan_data_clean = loan_data_clean[['loan_amnt', 'int_rate', 'installment', 'grade', 'emp_length', 'home_ownership',
                                     'annual_inc', 'purpose', 'inq_last_12m', 'delinq_2yrs', 'loan_status']]

In [None]:
loan_data_clean = loan_data_clean.dropna()
loan_data_clean = loan_data_clean.reset_index(drop = True)

## Prepare date for NN

In [None]:
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler

### Labels encoding

In [None]:
loan_data_clean_prepared = loan_data_clean.copy()

In [None]:
grade_encoder = preprocessing.LabelEncoder()

In [None]:
grade_encoder.fit(loan_data_clean['grade'].unique())

In [None]:
loan_data_clean_prepared['grade'] = grade_encoder.transform(loan_data_clean['grade'])

In [None]:
ownership_encoder = preprocessing.LabelEncoder()

In [None]:
ownership_encoder.fit(loan_data_clean['home_ownership'].unique())

In [None]:
loan_data_clean_prepared['home_ownership'] = ownership_encoder.transform(loan_data_clean['home_ownership'])

In [None]:
purpose_encoder = preprocessing.LabelEncoder()

In [None]:
purpose_encoder.fit(loan_data_clean['purpose'].unique())

In [None]:
loan_data_clean_prepared['purpose'] = purpose_encoder.transform(loan_data_clean['purpose'])

In [None]:
loan_data_clean_prepared['emp_length'] = [0.0 if emp == '< 1 year' else 
                                              10.0 if emp == '10+ years' else float(emp.split(' ')[0]) 
                                                  for emp in loan_data_clean['emp_length']]

In [None]:
loan_data_clean_prepared['loan_status'] = [0 if (stat == 'Default' or stat == 'Charged Off') else 1
                                               for stat in loan_data_clean_prepared['loan_status']]

In [None]:
loan_data_clean_prepared.head()

### Normalization

In [None]:
loan_data_clean_prepared_X = loan_data_clean_prepared.iloc[:, :-1]

In [None]:
loan_data_clean_prepared_Y = loan_data_clean_prepared.iloc[:, -1]

In [None]:
scaler = MinMaxScaler()

In [None]:
scaler.fit(loan_data_clean_prepared_X)

In [None]:
loan_data_clean_prepared_X = scaler.transform(loan_data_clean_prepared_X)

In [None]:
loan_data_clean_prepared_Y = loan_data_clean_prepared_Y.values

## NN

In [None]:
from keras import Sequential
from keras.layers import Dense, Dropout
from keras.models import model_from_json
from keras import backend

In [None]:
backend.clear_session()

In [None]:
model = Sequential()
#First Layer
model.add(Dense(10, activation='relu', kernel_initializer='random_normal', input_dim=10))
#First Hidden Layer
model.add(Dense(50, activation='tanh', kernel_initializer='random_normal'))
#Second Hidden Layer
model.add(Dense(100, activation='tanh', kernel_initializer='random_normal'))
model.add(Dropout(0.2))
#Third Hidden Layer
model.add(Dense(20, activation='tanh', kernel_initializer='random_normal'))
#Output Layer
model.add(Dense(1, activation='sigmoid', kernel_initializer='random_normal'))
model.summary()

In [None]:
#Compile the neural network
model.compile(optimizer ='adam',loss='binary_crossentropy', metrics =['accuracy'])

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(loan_data_clean_prepared_X, 
                                                    loan_data_clean_prepared_Y, 
                                                    test_size=0.33, random_state=15)

In [None]:
#Train the network
model.fit(X_train, y_train, batch_size=100, epochs=100)


In [None]:
#Evaluate test data
score = model.evaluate(X_test, y_test)
print(score)


### Save modelm scaler and encoders

In [None]:
#Serialize model to JSON
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
#Serialize weights to HDF5
model.save_weights("model.h5")

In [None]:
from pickle import dump

In [None]:
dump(scaler, open('scaler.pkl', 'wb'))
dump(grade_encoder, open('grade_encoder.pkl', 'wb'))
dump(ownership_encoder, open('ownership_encoder.pkl', 'wb'))
dump(purpose_encoder, open('purpose_encoder.pkl', 'wb'))