In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import os
import pickle
import timeit

# numpy settings
import numpy as np
np.random.seed(42) # to make this notebook's output stable across runs

# pandas settings
import pandas as pd
pd.set_option('display.max_columns', None)

# matplotlib settings
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# Datasets
DATASET_PATH = os.path.join(".", "datasets")

# Figures
IMAGE_PATH = os.path.join(".", "images")
if not os.path.isdir(IMAGE_PATH):
        os.makedirs(IMAGE_PATH)

def save_fig(fig_id, tight_layout=True):
    path = os.path.join(IMAGE_PATH, fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi=300)

# Models
MODEL_PATH = os.path.join(".", "models")
if not os.path.isdir(MODEL_PATH):
        os.makedirs(MODEL_PATH)
        
# Features
FEATURE_PATH = os.path.join(".", "features")
if not os.path.isdir(FEATURE_PATH):
        os.makedirs(FEATURE_PATH)

def save_features(feature_score, model_name):
    path = os.path.join(FEATURE_PATH, model_name + ".csv")
    with open(path, 'w') as f:
        for idx in range(len(feature_score)):
            f.write("{0},{1}\n".format(feature_score[idx][0], feature_score[idx][1]))

# Color for print
class color:
   PURPLE = '\033[95m'
   CYAN = '\033[96m'
   DARKCYAN = '\033[36m'
   BLUE = '\033[94m'
   GREEN = '\033[92m'
   YELLOW = '\033[93m'
   RED = '\033[91m'
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'

    
# Print a line
def print_lines(num, length):
    for i in range(num):
        print('-'*length)

***
## Data Preprocessing

This section preprocesses the raw data.

In [2]:
# Import packages
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.feature_selection import SelectKBest

In [3]:
# Load data
with open(os.path.join(DATASET_PATH, 'new_theorem_data.p'), 'rb') as f:
    u = pickle._Unpickler(f)
    u.encoding = 'latin1'
    loan_data = u.load()

Labels are extracted and removed from the dataset.

In [4]:
# Lables processing
loan_labels = loan_data['EnumListingStatus']
loan_labels.replace(to_replace=7, value=0, inplace=True) # cancelled loans
loan_labels.replace(to_replace=6, value=1, inplace=True) # originated loans
# loan_labels.shape

In [5]:
# Correct data type
bool_list = ['BoolIsLender']
loan_data[bool_list] = loan_data[bool_list].astype(bool)

cat_list = ['EnumListingCategory', 'EnumLoanFractionalType']
for cat in cat_list:
    loan_data[cat] = loan_data[cat].astype('object')

The feature selection needs careful treatment. 
Some features are identified as unnecessary either manually or by through model-based approach and are removed from the features. 
Speficically, features with low scores (given by the classfier) are removed. 

In [6]:
# Drop unnecessary features
feature_drop_list = ['EnumListingStatus',
                     'ListingID',
                     'DateWholeLoanEnd',
                     'NumPublicRecords12',
                     'NumOpenTradesDelinqOrPastDue6',
                     'BoolInGroup',
                     'BoolOwnsHome',
                     'BoolIsLender',
                     'BoolPartialFundingApproved',
                     'BoolEverWholeLoan',
                     'BoolIsFractionalLoan',
                     'NumTradesCurr30DPDOrDerog6',
                     'NumTradesDelinqOrPastDue6',
                     'EnumLoanFractionalType',
                    ]
loan_data.drop(feature_drop_list, axis=1, inplace=True)
    
prosper_list = ['NumPriorProsperLoans61dpd',
                'NumPriorProsperLoans31dpd',
                'NumPriorProsperLoansEarliestPayOff',
                'NumPriorProsperLoansOnTimePayments',
                'DolPriorProsperLoansPrincipalBorrowed',
                'DolPriorProsperLoansPrincipalOutstanding',
                'DolPriorProsperLoansBalanceOutstanding',
                'NumPriorProsperLoansLatePayments',
                'NumPriorProsperLoansCyclesBilled',
                'NumPriorProsperLoansLateCycles',
                'DolMaxPriorProsperLoan',
                'DolMinPriorProsperLoan']

The following code defines a class that returns a sub-dataframe that consists of specified data types from a dataframe.

In [7]:
# Data selection

class DataFrameSelector(BaseEstimator, TransformerMixin):
    def __init__(self, dtypes):
        self.dtypes = dtypes
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        return X.select_dtypes(self.dtypes).copy()


# Test
# DataFrameSelector(["int64"]).transform(loan_data).head()

The following code handles numerical features. `NAN` values are filled with 0 expcet for `FracDebtToIncomeRatio` and `ProsperScore`, which are filled with `mean` values. 

I tried to add some new features such as `IncomePaymentRatio` and `IncomeAmountRatio`. However, it turns out these new features do not contribute to the classification accuracy and thus are not adopted.

In fact, when time permits, it is recommended to generate various combinations of new features using both heuristics and pure techniques such as `PolynomialFeatures()` from `sklearn`. A good understanding of the loan market will definitely help.

In [8]:
# Numerical Feature Handling

class NumericalTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass
    def fit(self, X, y=None):
        return self
    def transform(self, X, y=None):
        # Replace NA values
        for col in ['FracDebtToIncomeRatio', 'ProsperScore']:
            X[col].fillna(X[col].mean(), inplace=True)
        X.fillna(0, inplace=True)
        
        # Drop some features
#         if y is not None:
#             # Select K best features, X is 2-d array with each column representing a feature
#             kbest_selector = SelectKBest(lambda X, y: np.array(map(lambda x:mic(x, y), X.T)), k=2)            
#             kbest_selector.fit_transform(X.values, y)
#             selected_cols = kbest_selector.get_support(indices=True)
#             X = X[selected_cols]

        # Generate new features
#         X['IncomePaymentRatio'] = X['DolMonthlyIncome'] / X['DolMonthlyLoanPayment']
#         X['IncomeAmountRatio'] = X['DolMonthlyIncome'] / X['DolLoanAmountRequested']
        
        return X


# Test
# loan_data_num = NumericalTransformer().transform(DataFrameSelector(['float64', 'int64']).transform(loan_data))
# loan_data_num.head()

The following code handles categorical features. `NAN` values are filled with `-`. 
Categorical features are encoded as integers. Another popular encoding, `One-hot Enconding`, is also tested. 
Yet, the benefit of using `One-hot Enconding` is not obvious here and thus is not used.

In [9]:
# Categorical features handling

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

class CategoricalTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, encoding='integer'):
        self.encoding = encoding
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        # Replace NAN values
        X.fillna('-', inplace=True)
    
        # Encode
        le = LabelEncoder()
        X = X.apply(le.fit_transform)        
        if self.encoding == 'onehot':
            ohe = OneHotEncoder()
            X = ohe.fit_transform(X.values)
        
        return X


# Test
# loan_data_cat = CategoricalTransformer().transform(DataFrameSelector(['object']).transform(loan_data))
# loan_data_cat

The following code handles Datetime features. NAN values are filled with `2017-12-31 23:59:59`.
All Datetime values are converted to days from `1900-01-01 00:00:00`.

Two new features are added, i.e.,  `CreditLength` and `ListingTime`. 

In [10]:
# Datetime features handling

class DatetimeTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        # Replace NA values
        X.fillna(pd.Timestamp('2017-12-31 23:59:59'), inplace=True)
          
        # Convert to days
        X['TimeBase'] = pd.Timestamp('1900-01-01 00:00:00')
        for col in X.columns:
            if col == 'TimeBase':
                pass
            else:
                X[col] = (X[col] - X['TimeBase']).dt.days
        X.drop(['TimeBase'], axis=1, inplace=True)
        
        # Generate new features
        X['CreditLength'] = X["DateCreditPulled"] - X["DateFirstCredit"]
        X['ListingTime'] = X["DateListingStart"] - X["DateListingCreation"]
        
        return X


# Test
# loan_data_dt = DatetimeTransformer().transform(DataFrameSelector(['datetime64']).transform(loan_data))
# loan_data_dt.head()

The following code handles bool features. All values are converted to 0 or 1.

In [11]:
# Bool feature handling

class BoolTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        X = X.astype(int)
        return X


# Test
# loan_data_bool = BoolTransformer().transform(DataFrameSelector(['bool']).transform(loan_data))
# loan_data_bool.head()

In [12]:
# Scaler
from sklearn.preprocessing import StandardScaler

class Scaler(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        X = pd.DataFrame(StandardScaler().fit_transform(X.values),
                                index=X.index, columns=X.columns)
        return X

    
# Test
# loan_data_scaled = Scaler().fit_transform(DataFrameSelector(['int64']).transform(loan_data))
# loan_data_scaled.head()

In [13]:
# Transformation pipelines

from sklearn.pipeline import Pipeline 

num_pipeline = Pipeline([
        ('selector', DataFrameSelector(['float64', 'int64'])),
        ('transformer', NumericalTransformer()),
        ('scaler', Scaler()),
    ])

cat_pipeline = Pipeline([
        ('selector', DataFrameSelector(['object'])),
        ('transformer', CategoricalTransformer()),
    ])
                                                     
dt_pipeline = Pipeline([
        ('selector', DataFrameSelector(['datetime64'])),
        ('transformer', DatetimeTransformer()),
        ('scaler', Scaler()),
    ])

bool_pipeline = Pipeline([
        ('selector', DataFrameSelector(['bool'])),
        ('transformer', BoolTransformer()),
    ])

# Pandas dataframe returned
loan_data_clean = num_pipeline.fit_transform(loan_data)
loan_data_clean = loan_data_clean.join(cat_pipeline.fit_transform(loan_data))
loan_data_clean = loan_data_clean.join(dt_pipeline.fit_transform(loan_data))
loan_data_clean = loan_data_clean.join(bool_pipeline.fit_transform(loan_data))
# loan_data_clean.head()



In [14]:
# # Prepare data using FeatureUnion

# from sklearn.pipeline import FeatureUnion

# data_pipeline = FeatureUnion(transformer_list=[
#     ("num_pipeline", num_pipeline),
#     ("cat_pipeline", cat_pipeline),
#     ("dt_pipeline", dt_pipeline),
#     ("bool_pipeline", bool_pipeline)
# ])

# # Numpy array returned
# loan_data_clean = data_pipeline.fit_transform(loan_data)

In [15]:
# Drop unnecessary features
null_cols = [
             'DolPriorProsperLoansPrincipalBorrowed',
             'DolPriorProsperLoansPrincipalOutstanding',
             'DolPriorProsperLoansBalanceOutstanding',
             'NumPriorProsperLoansCyclesBilled',
             'NumPriorProsperLoansOnTimePayments',
             'NumPriorProsperLoansLateCycles',
             'NumPriorProsperLoansLatePayments',
             'DolMaxPriorProsperLoan',
             'DolMinPriorProsperLoan',
             'NumPriorProsperLoansEarliestPayOff',
             'NumPriorProsperLoans31dpd',
             'NumPriorProsperLoans61dpd',
             'DolMonthlyIncome'
             ]
loan_data_clean.drop(null_cols, axis=1, inplace=True)

In [16]:
def drop_date_columns(data, col):
    data.drop(col, axis=1, inplace=True)
    return None

In [17]:
drop_date_columns(loan_data_clean, 'DateCreditPulled')
drop_date_columns(loan_data_clean, 'DateListingStart')
drop_date_columns(loan_data_clean, 'DateListingCreation')
drop_date_columns(loan_data_clean, 'DateFirstCredit')
drop_date_columns(loan_data_clean, 'DateWholeLoanStart')

In [18]:
# Split the data into a training set and a test set

# Regular sampling
from sklearn.model_selection import train_test_split

# Full set for mode tunning
X_train, X_test, y_train, y_test = train_test_split(loan_data_clean, loan_labels, test_size=0.2, random_state=42)
print([X_train.shape, y_train.shape, X_test.shape, y_test.shape])

[(201975, 56), (201975,), (50494, 56), (50494,)]


In [19]:
loan_data_clean.head()

Unnamed: 0_level_0,DolLoanAmountRequested,LenderYield,BorrowerRate,NumMonthsTerm,DolMonthlyLoanPayment,FICOScore,ProsperScore,FracDebtToIncomeRatio,NumMonthsEmployed,NumPriorProsperLoansActive,NumPriorProsperLoans,NumTrades,NumTradesOpened6,NumTradesEver90DPDOrDerog,DolTotalBalanceAllOpenTrades6,DolTotalBalanceOnPublicRecords,DolTotalPaymentAllOpenTrades6,AgeOldestTrade,NumInquiries6MinusLastTwoWeeks,PctTradesNeverDelinquent,NumBankcardTradesOpened12,DolTotalAvailBankcardCredit6,DolTotalBalanceInstallTradesReptd6,NumRealEstateTrades,NumRealPropertyTrades,DolTotalBalanceOpenRevolving6,NumTradesEverDerog,DolMonthlyDebt,NumCurrentDelinquencies,NumDelinquencies84,NumPublicRecordsLast10Years,NumCreditLines84,NumInquiries6,DolAmountDelinquent,NumCurrentCreditLines,NumOpenCreditLines,PctBankcardUtil,NumOpenRevolvingAccounts,DolRealEstateBalance,DolRevolvingBalance,DolRealEstatePayment,NumTotalInquiries,NumSatisfactoryAccounts,NumWasDelinquentDerog,NumDelinquenciesOver30Days,NumDelinquenciesOver60Days,CreditGrade,EnumListingCategory,StrEmploymentStatus,StrOccupation,StrState,StrBorrowerCity,EnumChannelCode,CreditLength,ListingTime,BoolIncomeVerifiable
ListingNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1
973605,0.165053,0.365676,0.310655,1.424106,-0.189834,-0.291163,-0.256238,0.083457,3.227856,-0.211541,-0.224207,0.38777,0.02451,-0.526781,-0.132137,-0.023076,-0.261019,1.993553,-0.797908,0.399033,-0.473268,-0.623496,-0.767534,0.337547,0.792132,0.496868,-0.489204,0.191178,-0.291167,-0.303363,-0.442038,0.549421,-0.79389,-0.07692,-0.094129,-0.544434,1.584098,-0.358332,-0.102032,0.937053,-0.140513,-0.052225,0.618965,-0.060657,0.738461,-0.177781,2,1,0,64,42,19886,2,1.965917,9.370667,1
981099,0.165053,0.304927,0.244141,1.424106,-0.201926,0.239388,0.547269,0.611062,-0.73554,-0.211541,-0.224207,2.236386,0.02451,-0.526781,3.232604,-0.023076,1.815838,2.104297,-0.797908,0.763537,0.521764,-0.196134,3.715439,0.337547,0.792132,-0.13366,-0.489204,1.558951,-0.291167,-0.399916,-0.442038,2.019315,-0.79389,-0.07692,0.668492,0.669502,-0.274887,0.289688,2.756163,-0.105565,1.239818,-0.511948,2.25005,-0.351604,-0.203094,-0.177781,2,1,3,0,4,75,2,2.076758,7.706455,1
1025766,-1.229641,1.172768,1.194343,-0.702195,-1.125911,-0.821713,-0.256238,1.798174,-1.004245,4.725382,7.340314,-0.844641,-0.825499,-0.156655,-0.661996,-0.023076,-0.526491,-1.218039,0.859226,-0.512227,-0.14159,-0.574142,-0.459356,-0.89933,-0.889088,0.06272,-0.105847,-0.241232,-0.291167,-0.20681,-0.442038,-0.84311,0.528544,-0.07692,-0.666094,-0.746757,1.432344,-0.790345,-0.653242,0.003064,-0.537325,-0.052225,-0.849012,-0.060657,-0.046168,0.094518,4,1,0,42,21,11603,3,-1.215775,6.781892,1
1003835,-0.468899,-0.191478,-0.299375,-0.702195,-0.317603,1.30049,-0.657991,-0.773901,-1.023438,-0.211541,-0.224207,-1.460846,0.02451,-0.526781,-0.82234,-0.023076,-0.929153,-1.620746,-0.797908,0.763537,-0.804945,-0.361999,-0.767534,-0.89933,-0.889088,-0.521536,-0.489204,-1.140017,-0.291167,-0.399916,-0.442038,-1.539376,-0.79389,-0.07692,-0.856749,-0.746757,-1.109533,-0.358332,-0.653242,-0.511372,-0.537325,-0.971672,-1.419891,-0.642552,-0.516945,-0.450081,0,10,0,30,33,13897,4,-1.608786,4.840311,1
1011335,0.799005,0.053253,-0.031418,1.424106,0.257227,-1.352264,1.752528,-0.641999,-0.178937,-0.211541,2.2973,0.475799,0.02451,-0.156655,-0.619685,0.006025,-0.325754,0.161234,-0.797908,-0.147723,-0.473268,-0.609514,0.250229,-0.280891,-0.048478,-0.441696,-0.105847,0.219918,0.756589,-0.013703,2.567408,1.168324,-0.79389,-0.07692,-0.094129,0.062534,0.977082,0.073681,-0.653242,-0.43994,-0.537325,2.706117,1.10829,0.23029,0.581535,0.911417,2,1,0,20,21,1092,3,0.153553,5.210136,1


## `Deep Learning`

#### Split data into 80/2 training/test

In [34]:
train_size = 80000
test_size = 2000
X_train_s, y_train_s = X_train[:train_size], y_train[:train_size]
X_test_s, y_test_s = X_test[:test_size], y_test[:test_size]

print([X_train_s.shape, y_train_s.shape, X_test_s.shape, y_test_s.shape])
DL_X_train, DL_X_test = X_train_s.values, X_test_s.values
DL_Y_train, DL_Y_test = y_train_s.values, y_test_s.values 
DL_Y_train = np.array([DL_Y_train, -(DL_Y_train-1)]).T
DL_Y_test = np.array([DL_Y_test, -(DL_Y_test-1)]).T

[(80000, 56), (80000,), (2000, 56), (2000,)]


In [35]:
import tensorflow as tf

In [36]:
# Parameters
lr_0 = 0.001
decay_rate = 9/1500
training_epochs = 1000
batch_size = 256
display_step = 100
drop_rate_1 = 0.25
drop_rate_2 = 0.5


# Network Parameters
n_classes = 2 # Number of classes to predict
n_inputs = X_train_s.shape[1] # Number of feature
n_hidden_1 = 1024 # 1st layer number of features
n_hidden_2 = 10 # 2st layer number of features
n_hidden_3 = 128 # 3nd layer number of features
n_hidden_4 = 128 # 4nd layer number of features
n_hidden_5 = 64 # 5nd layer number of features
n_hidden_6 = 64 # 6nd layer number of features
n_hidden_7 = 64 # 7nd layer number of features

#### Construct 7-layer CNN with regularization, drop-out, batch normalization and relu as activation function

In [37]:
def dense(inputs, num_outputs, activation_fn):
    regularizer = tf.contrib.layers.l2_regularizer(scale=0.2)
    return tf.contrib.layers.fully_connected(inputs, num_outputs, activation_fn=None, weights_regularizer=regularizer)

def convnn(inputs, filt, knsize, activation_fn):
    return tf.layers.conv1d(inputs=inputs, filters=filt, kernel_size=knsize, padding="same", activation=None)

def pooling(inputs):
    return tf.layers.max_pooling1d(inputs=inputs, pool_size=2, strides=2)
    
def dense_relu(inputs, num_outputs, is_training, scope):
    with tf.variable_scope(scope):
        h1 = dense(inputs, num_outputs, scope)
        return tf.nn.relu(h1, 'relu')

def dense_batch_relu(inputs, num_outputs, is_training, scope):
    with tf.variable_scope(scope):
        h1 = dense(inputs, num_outputs, scope)
        h2 = tf.contrib.layers.batch_norm(h1, center=True, scale=True, is_training=is_training, scope='bn')
        return tf.nn.relu(h2, 'relu')

def cnn_batch_relu(inputs, filt, knsize, is_training, scope):
    with tf.variable_scope(scope):
        h1 = convnn(inputs, filt, knsize, scope)
#         h2 = tf.contrib.layers.batch_norm(h1, center=True, scale=True, is_training=is_training, scope='bn')
        return tf.nn.relu(h1, 'relu')
    
def dense_dropout_relu(inputs, num_outputs, dropout_rate, is_training, scope):
    with tf.variable_scope(scope): 
        h1 = dense(inputs, num_outputs, scope)
        h2 = tf.layers.dropout(inputs=h1, rate=dropout_rate, training=is_training)
        return tf.nn.relu(h2, 'relu')


#### Implement CNN and output accuracy and loss

In [38]:
tf.reset_default_graph()
X = tf.placeholder('float32', [None, n_inputs, 1], name='X')
Y = tf.placeholder('float32', (None, n_classes), name='Y')
is_training = tf.placeholder(tf.bool, name='is_training')
learning_rate = tf.placeholder('float32', name='lr')

input_layer = tf.reshape(X, [-1, n_inputs, 1])
h1 = cnn_batch_relu(X, 32, 5, is_training=is_training, scope='cnn1')
h2 = pooling(h1)
h2_dr = tf.layers.dropout(inputs=h2, rate=drop_rate_1, training=is_training)
h3 = cnn_batch_relu(h2_dr, 64, 5, is_training=is_training, scope='cnn2')
h4 = pooling(h3)
h4_dr = tf.layers.dropout(inputs=h4, rate=drop_rate_1, training=is_training)
h4_flatten = tf.contrib.layers.flatten(h4_dr)
h5 = dense_dropout_relu(inputs=h4_flatten, dropout_rate=drop_rate_2, num_outputs=n_hidden_1, is_training=is_training, scope='layer5')
logits = dense(inputs=h5, num_outputs=2, activation_fn='logits')   

with tf.name_scope('accuracy'):
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(Y, 1), tf.argmax(logits, 1)), 'float32'))

with tf.name_scope('loss'):
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=Y, logits=logits))

update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
    # Ensures that we execute the update_ops before performing the train_step
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
#     optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
#     gvs = optimizer.compute_gradients(loss)
#     capped_gvs = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in gvs]
#     train_op = optimizer.apply_gradients(capped_gvs)

In [39]:
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.
        n_batches = int(len(DL_X_train)/batch_size)
        X_batches = np.array_split(DL_X_train, n_batches)
        Y_batches = np.array_split(DL_Y_train, n_batches)
        lr_value = 1/(1 + decay_rate * epoch) * lr_0
        # Loop over all batches
        for i in range(n_batches):
            batch_X, batch_Y = X_batches[i], Y_batches[i]
            batch_X = np.reshape(batch_X, (np.shape(batch_X)[0], n_inputs, 1))
            # Run optimization op (backprop) and cost op (to get loss value)
#             _, c = sess.run([optimizer, loss], feed_dict={X: batch_X, Y: batch_Y, is_training:1, learning_rate:lr_value})
            _, c = sess.run([optimizer, loss], feed_dict={X: batch_X, Y: batch_Y, is_training:1, learning_rate:lr_value})
            # Compute average loss
            avg_cost += c / n_batches
        # Display logs per epoch step
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost))
    print("Optimization Finished!")

    # Test model
    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    DL_X_train_tr = np.reshape(DL_X_train, (np.shape(DL_X_train)[0], n_inputs, 1))
    DL_X_test_tr = np.reshape(DL_X_test, (np.shape(DL_X_test)[0], n_inputs, 1))
    print("Training Accuracy:", accuracy.eval({X: DL_X_train_tr, Y: DL_Y_train, is_training:1}))
    print("Test Accuracy:", accuracy.eval({X: DL_X_test_tr, Y: DL_Y_test, is_training:0}))
#     global result 
#     result = tf.argmax(pred, 1).eval({X: DL_X_test, Y: DL_Y_test})

#     # plot the cost
#     plt.plot(np.squeeze(loss))
#     plt.ylabel('cost')
#     plt.xlabel('iterations (per tens)')
#     plt.title("Learning rate =" + str(learning_rate))
#     plt.show()

Epoch: 0001 cost= 31.617279328
Epoch: 0101 cost= 0.561617220
Epoch: 0201 cost= 0.543271641
Epoch: 0301 cost= 0.521064070
Epoch: 0401 cost= 0.506781861
Epoch: 0501 cost= 0.497414144
Epoch: 0601 cost= 0.488500934
Epoch: 0701 cost= 0.480860585
Epoch: 0801 cost= 0.474410257
Epoch: 0901 cost= 0.471704989
Optimization Finished!
Training Accuracy: 0.771
Test Accuracy: 0.6925


In [40]:
with tf.Session() as sess:
    sess.run(init)
    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    DL_X_train_tr = np.reshape(DL_X_train, (np.shape(DL_X_train)[0], n_inputs, 1))
    DL_X_test_tr = np.reshape(DL_X_test, (np.shape(DL_X_test)[0], n_inputs, 1))
    print("Training Accuracy:", accuracy.eval({X: DL_X_train_tr, Y: DL_Y_train, is_training:1}))
    print("Test Accuracy:", accuracy.eval({X: DL_X_test_tr, Y: DL_Y_test, is_training:0}))

Training Accuracy: 0.5652875
Test Accuracy: 0.6525
