# Dictionary stuff

In [None]:
'''
Notable Code:

Saved code for checking classes and inverse encoding the labels:
    y_pred = model.predict_classes(data_bundle)
    y_classes = label_encoder.inverse_transform(y_pred)

'''

In [177]:
# Loading data in from raw csv
import pandas as pd
df = pd.read_csv('datasets/Credit_Data_Raw.csv')
df

Unnamed: 0,CHK_ACCT,DURATION,HISTORY,NEW_CAR,USED_CAR,FURNITURE,RADIO_TV,EDUCATION,RETRAINING,AMOUNT,...,AGE,OTHER_INSTALL,RENT,OWN_RES,NUM_CREDITS,JOB,NUM_DEPENDENTS,TELEPHONE,FOREIGN,DEFAULT
0,0,6,4,0,0,0,1,0,0,1169,...,67,0,0,1,2,2,1,1,0,0
1,1,48,2,0,0,0,1,0,0,5951,...,22,0,0,1,1,2,1,0,0,1
2,3,12,4,0,0,0,0,1,0,2096,...,49,0,0,1,1,1,2,0,0,0
3,0,42,2,0,0,1,0,0,0,7882,...,45,0,0,0,1,2,2,0,0,0
4,0,24,3,1,0,0,0,0,0,4870,...,53,0,0,0,2,2,2,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,3,12,2,0,0,1,0,0,0,1736,...,31,0,0,1,1,1,1,0,0,0
996,0,30,2,0,1,0,0,0,0,3857,...,40,0,0,1,1,3,1,1,0,0
997,3,12,2,0,0,0,1,0,0,804,...,38,0,0,1,1,2,1,0,0,0
998,0,45,2,0,0,0,1,0,0,1845,...,23,0,0,0,1,2,1,1,0,1


# Model Function Testing

In [196]:
'''
Credit Crunch
Author: Andrew McKinney
Creation Date: 2020-04-28
'''




def credit_crunch(converted_data,  return_evaluation=False):
# Credit Crunch is a TensorFlow Neural Network that predicts an applicant's probablity to default on a loan. 
# If default is predicted, a loan denial is returned; otherwise, approved.
# The NN model is dynamically created everytime to match in input data that is imported as a key:value dictionary.
# return_evaluation should be a boolean value (true/false) on whether or not to return model evaluation metrics with function
# Generic NN model parameters can be set in the DEV TOOLS.


    ### DEV TOOLS ###
    return_model_evaluation = return_evaluation
    numpy_seed = 42
    number_inputs = len(converted_data)
    number_classes = 2
    number_hidden_layers = 1
    number_hidden_nodes = 60
    number_epochs = 30
    layer_activation = 'relu'
    classifier_activation = 'softmax'
    learn_metrics = ['accuracy']
    loss_type = 'categorical_crossentropy'
    optimizer_type = 'adam'

    # import dependencies
    import numpy as np
    import pandas as pd
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import LabelEncoder, MinMaxScaler
    from tensorflow.keras.utils import to_categorical
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import Dense



    # setting numpy seed for reproducible results
    np.random.seed(numpy_seed)


    # import train data
    raw_data = pd.read_csv('datasets/Credit_Data_Raw.csv')

    raw_data.dropna()
    
    # defining labels, input fields, and input form data
    X = raw_data.drop('DEFAULT', axis=1)[[item for item in converted_data]]
    y = np.array(raw_data['DEFAULT']).reshape(-1, 1)
    data_bundle = np.array(list([converted_data[item] for item in converted_data])).reshape(1, -1)
    
    # spliting data to test and training sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
    
    
    # scaling data 
    X_scaler = MinMaxScaler().fit(X_train)
    X_train_scaled = X_scaler.transform(X_train)
    X_test_scaled = X_scaler.transform(X_test)
    data_bundle_scaled = X_scaler.transform(data_bundle)
    
    
    # one-hot-encoding labels
    label_encoder = LabelEncoder()
    label_encoder.fit(y_train)
    encoded_y_train = label_encoder.transform(y_train)
    encoded_y_test = label_encoder.transform(y_test)
    y_train_categorical = to_categorical(encoded_y_train)
    y_test_categorical = to_categorical(encoded_y_test)
    
    # instantiating Neural Net Model
    model = Sequential()
    
    # adding input layer
    model.add(Dense(units=number_hidden_nodes, activation=layer_activation, input_dim=number_inputs))

    # adding hidden layers
    for layer in np.arange(0, number_hidden_layers):
        model.add(Dense(units=number_hidden_nodes, activation=layer_activation))

    # adding classifier layer
    model.add(Dense(units=number_classes, activation=classifier_activation))

    # compiling model
    model.compile(optimizer=optimizer_type, loss=loss_type, metrics=learn_metrics)

    # fitting model to training data
    model.fit(
        X_train_scaled,
        y_train_categorical,
        epochs=number_epochs,
        shuffle=True,
        verbose=0
    )


    # predicting approval for user (1st # is Approval Probability or 2nd # is Default Probability)
    crunchies = model.predict(data_bundle_scaled)

    
    # returning model evaluation if turned on
    if return_model_evaluation:
        model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test_categorical, verbose=0)

        return crunchies, model_loss, model_accuracy

    else:

        return crunchies


In [181]:
def approval_check(crunchies, model_accuracy):
# approval_check determines the approval status of an applicant based on the approval probability and determining model accuracy
    
    # hard coding approval status variables
    sufficient_accuracy = 0.7
    pending_probability = 0.4
    approved_probability = 0.6

    # extracting approval probability from crunchies (class prediction probabilities); 
    # 1st value, [0], is approval probabilty
    # 2nd value, [1], is default probability
    approval_probability = crunchies[0][0]

    # determining approval status based on model accuracy and approval probability
    if model_accuracy > sufficient_accuracy:
        if approval_probability >= approved_probability:
            return("Approval")
        elif approval_probability >= pending_probability:
            return('Pending')
        else:
            return('Declined')
        
    else:
        if approval_probability >= approved_probability:
            return('Pending')
        else:
            return('Declined')
    


In [182]:
# This cell grabs the first # rows (dp_count) and packages them so they can be input into the function
import numpy as np

dp_count = 5
dp_list = []
for item in np.arange(0, dp_count):
    t1 = df.drop('DURATION', axis=1).iloc[item]
    dp = {}

    for index, item in enumerate(df.columns.drop(['DEFAULT'])):
        dp[item] = t1[index]
    dp_list.append(dp)

In [197]:
# This cell takes the dp_list from previous cell and loops through them, predicting 1 at a time (like our app will)
for index, item in enumerate(dp_list):
    print(f'Number of Inputs:{len(item)}')
    test, ml, ma= credit_crunch(dp, True)
    actual = df['DEFAULT'][index]
    results = approval_check(test, ma)
    print(actual, test, results, ma, ml)

Number of Inputs:30


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


0 [[1. 0.]] Approval 0.732 0.5350992822647095
Number of Inputs:30


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


1 [[1. 0.]] Approval 0.776 0.5077096948623657
Number of Inputs:30


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


0 [[0. 1.]] Declined 0.744 0.5206109430789948
Number of Inputs:30


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


0 [[0. 1.]] Declined 0.752 0.5254103331565857
Number of Inputs:30


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


1 [[1. 0.]] Approval 0.752 0.5256337494850158


# Model Variable Evaluation
Testing number of nodes, layers, and epochs to get best model

In [None]:
'''
Credit Crunch
Author: Andrew McKinney
Creation Date: 2020-04-28
'''

import numpy as np

max_layers = 2
max_nodes = 10
max_epochs = 10

for num_lays in np.arange(1, max_layers):

    for num_nodes in np.arange(5, max_nodes, 5):
        
        for num_epochs in np.arange(10, max_epochs, 10):
            ### DEV TOOLS ###
            return_model_evaluation = return_evaluation
            numpy_seed = 42
            number_inputs = len(converted_data)
            number_classes = 2
            number_hidden_layers = num_lays
            number_hidden_nodes = num_nodes
            number_epochs = num_epochs
            layer_activation = 'relu'
            classifier_activation = 'softmax'
            learn_metrics = ['accuracy']
            loss_type = 'categorical_crossentropy'
            optimizer_type = 'adam'

            # import dependencies
            import numpy as np
            import pandas as pd
            from sklearn.model_selection import train_test_split
            from sklearn.preprocessing import LabelEncoder, MinMaxScaler
            from tensorflow.keras.utils import to_categorical
            from tensorflow.keras.models import Sequential
            from tensorflow.keras.layers import Dense



            # setting numpy seed for reproducible results
            np.random.seed(numpy_seed)


            # import train data
            raw_data = pd.read_csv('datasets/Credit_Data_Raw.csv')

            raw_data.dropna()

            # defining labels, input fields, and input form data
            X = raw_data.drop('DEFAULT', axis=1)[[item for item in converted_data]]
            y = np.array(raw_data['DEFAULT']).reshape(-1, 1)

            # spliting data to test and training sets
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)


            # scaling data 
            X_scaler = MinMaxScaler().fit(X_train)
            X_train_scaled = X_scaler.transform(X_train)
            X_test_scaled = X_scaler.transform(X_test)


            # one-hot-encoding output labels
            label_encoder = LabelEncoder()
            label_encoder.fit(y_train)
            encoded_y_train = label_encoder.transform(y_train)
            encoded_y_test = label_encoder.transform(y_test)
            y_train_categorical = to_categorical(encoded_y_train)
            y_test_categorical = to_categorical(encoded_y_test)

            # instantiating Neural Net Model
            model = Sequential()

            # adding input layer
            model.add(Dense(units=number_hidden_nodes, activation=layer_activation, input_dim=number_inputs))

            # adding hidden layers
            for layer in np.arange(0, number_hidden_layers):
                model.add(Dense(units=number_hidden_nodes, activation=layer_activation))

            # adding classifier layer
            model.add(Dense(units=number_classes, activation=classifier_activation))

            # compiling model
            model.compile(optimizer=optimizer_type, loss=loss_type, metrics=learn_metrics)

            # fitting model to training data
            model.fit(
                X_train_scaled,
                y_train_categorical,
                epochs=number_epochs,
                shuffle=True,
                verbose=0
            )

            model.predict

            model_loss, model_accuracy = model.evaluate(X_test_scaled, y_test_categorical, verbose=2)
            
            print()


