# Logistic Regression Modeling

## Import packages and modules

In [1]:
import csv
import numpy as np
import pandas as pd

import seaborn as sns
from matplotlib import pyplot as plt

from imblearn.under_sampling import RandomUnderSampler 
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from keras import metrics
tf.get_logger().setLevel('INFO')

## Import data

In [3]:
%run /Users/apassan/Documents/03_Professional/07_GitHubRepo/APassan_Portfolio/Classification_CreditCardFraud/scripts/model_prep.py

### Confirm X and Y Shapes

In [6]:
# Print the shapes to ensure the matrix dimensions line up
print('X_train shape:', X_train.shape)
print('Y_train shape:', Y_train.shape)
print('X_test shape:', X_test.shape)
print('Y_test shape:', Y_test.shape)


X_train shape: (139844, 7)
Y_train shape: (139844,)
X_test shape: (34962, 7)
Y_test shape: (34962,)


## Modeling

We will first write a function setting up our logistic regression model.

In [57]:
def build_logreg_model(num_features, learning_rate):
    '''
    Goal: build a TensorFlow logistic regression model using Keras
    
    Arguments: learning_rate
    
    Returns: model - a tf.keras model (graph)
    '''
    
    # Each time we build a model, tf will add new nodes instead of overwriting.
    tf.keras.backend.clear_session 
    
    # Set a random see to ensure results are same on each identical training run
    np.random.seed(0)
    tf.random.set_seed(0)
    
    # Build a model using keras.Sequential
    model = keras.Sequential()
    
    # Add the logistic layer (only layer in this model)
    model.add(keras.layers.Dense(
        units = 1, # output dimension
        input_shape = [num_features], # input dimension
        use_bias = True, # Use a bias parameter
        activation = 'sigmoid' # Apply sigmoid function for logistic regression
    ))
    
    # Use stochastic gradient descent optimizer
    optimizer = tf.keras.optimizers.SGD(learning_rate = learning_rate)
    
    # Compile model 
    model.compile(loss = 'binary_crossentropy', # Use binary cross entropy loss function
                  optimizer = optimizer, # Use as defined above SGD
                  metrics = [metrics.binary_accuracy]) # Get tf to report on accuracy when evaluating the model
    return model

Then we will write a function that allows us to train a model and tune various hyperparamters. 

In [64]:
def run_logreg_model(no_feat, features, learning_rates):
    for i in learning_rates:
        model = build_logreg_model(no_feat, i)
        history = model.fit(
            x = features,
            y = Y_train,
            epochs = 10,
            batch_size = 32,
            validation_split = 0.1,
            verbose = 0)
    
        # Convert the return value into a DataFrame so we can see the train loss 
        # and binary accuracy after every epoch.
        history = pd.DataFrame(history.history)
        display(history)

In [65]:
def plot_loss(model):
    '''
    Goal: Plot the loss after each training epoch.
    '''
    # Convert the history object into a DataFrame.
    history = pd.DataFrame(history.history)
    
    plt.figure()
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.plot(range(len(history)), history['loss'], marker='.', color='black')
    plt.plot(range(len(history)), history['val_loss'], marker='.', color='red')
    plt.legend(['train loss', 'validation loss'])
    plt.show()
    
    # Show the final train loss value and the learned model weights.
    print('Final train loss:', list(history['loss'])[-1])
    print('Final weights:', model.layers[0].get_weights())

In [66]:
feat_1 = X_train.iloc[:, 6:]
feat_2 = X_train.iloc[:, 5:]
feat_3 = X_train.iloc[:, 4:]
feat_4 = X_train.iloc[:, 3:]
feat_5 = X_train.iloc[:, 2:]
feat_6 = X_train.iloc[:, 1:]
feat_7 = X_train.iloc[:, 0:]

- In the previous EDA, the norm ratio to median purchase price seemed to be the most correlated with fraud. We'll first run the model with this feature with multiple learning rates to determine which learning rate to pick for the model as we build it up.

In [67]:
rate1 = [0.01, 0.001, 0.0001]
model_1 = run_logreg_model(1, feat_1, rate1)

2022-06-22 18:14:48.720204: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-06-22 18:15:01.118660: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Unnamed: 0,loss,binary_accuracy,val_loss,val_binary_accuracy
0,0.494582,0.797154,0.460077,0.829246
1,0.462604,0.825805,0.459892,0.828388
2,0.462567,0.82594,0.459969,0.827601
3,0.462567,0.825654,0.459964,0.82753
4,0.462551,0.82563,0.459913,0.828316
5,0.46257,0.825916,0.459926,0.828102
6,0.462566,0.825813,0.459932,0.827816
7,0.462563,0.825543,0.459882,0.828388
8,0.462571,0.825662,0.459873,0.828888
9,0.462569,0.825773,0.459894,0.828245


2022-06-22 18:17:01.198538: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-06-22 18:17:13.621119: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Unnamed: 0,loss,binary_accuracy,val_loss,val_binary_accuracy
0,0.683282,0.576304,0.544739,0.800286
1,0.516018,0.808063,0.494348,0.817233
2,0.485603,0.818638,0.476084,0.823239
3,0.473579,0.822087,0.468085,0.82567
4,0.46815,0.823803,0.464264,0.827172
5,0.465504,0.824701,0.462304,0.827601
6,0.464141,0.82532,0.461263,0.827887
7,0.463418,0.825448,0.460684,0.828173
8,0.463025,0.825495,0.460359,0.828245
9,0.46281,0.82567,0.460173,0.828245


2022-06-22 18:19:13.437027: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-06-22 18:19:26.228573: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Unnamed: 0,loss,binary_accuracy,val_loss,val_binary_accuracy
0,1.02933,0.218427,0.939041,0.21094
1,0.865121,0.204689,0.797296,0.191848
2,0.746663,0.190197,0.703593,0.364104
3,0.674925,0.540422,0.651231,0.686164
4,0.63431,0.731144,0.619136,0.757383
5,0.607646,0.765952,0.596552,0.776332
6,0.588109,0.779094,0.579348,0.785842
7,0.572862,0.786595,0.565584,0.791205
8,0.560467,0.792538,0.554217,0.796425
9,0.550117,0.796677,0.544615,0.800215


In [69]:
rate2 = [0.01, 0.001]
model_2 = run_logreg_model(2, feat_2, rate2)

2022-06-22 18:23:07.590288: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-06-22 18:23:21.541185: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Unnamed: 0,loss,binary_accuracy,val_loss,val_binary_accuracy
0,0.48227,0.810733,0.436058,0.850983
1,0.435817,0.852462,0.435403,0.851913
2,0.435647,0.853193,0.43544,0.851412
3,0.435604,0.853519,0.435486,0.851055
4,0.435636,0.852502,0.435405,0.852413
5,0.435612,0.852732,0.435456,0.852914
6,0.435657,0.853209,0.435447,0.852699
7,0.435645,0.853169,0.435402,0.852056
8,0.435645,0.853177,0.435402,0.85227
9,0.435633,0.852708,0.435431,0.852628


2022-06-22 18:25:22.422244: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-06-22 18:25:35.148051: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Unnamed: 0,loss,binary_accuracy,val_loss,val_binary_accuracy
0,0.745274,0.517841,0.551972,0.791634
1,0.512229,0.81554,0.48479,0.830819
2,0.471379,0.836468,0.460423,0.841902
3,0.454476,0.843539,0.449139,0.845334
4,0.446249,0.846121,0.443343,0.847193
5,0.44186,0.848005,0.440139,0.848695
6,0.439379,0.84922,0.438296,0.84941
7,0.437927,0.85019,0.437203,0.85034
8,0.437054,0.850674,0.43654,0.850697
9,0.436513,0.851024,0.436125,0.850912


- We will pick 0.01 as the learning rate, as the associated loss seems to be converging the best.

In [70]:
rate_fin = [0.01]
model_3 = run_logreg_model(3, feat_3, rate_fin)

2022-06-22 18:31:39.606352: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-06-22 18:31:52.183635: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Unnamed: 0,loss,binary_accuracy,val_loss,val_binary_accuracy
0,0.392874,0.886651,0.331522,0.91877
1,0.328817,0.91535,0.325058,0.917769
2,0.326166,0.915437,0.324267,0.918913
3,0.325706,0.915358,0.324243,0.918842
4,0.325689,0.915342,0.324102,0.917626
5,0.325619,0.915485,0.324148,0.918556
6,0.325693,0.915374,0.324136,0.918341
7,0.325676,0.915628,0.324091,0.917626
8,0.325674,0.915548,0.324099,0.917912
9,0.325668,0.915389,0.324104,0.917626


In [73]:
model_4 = run_logreg_model(4, feat_4, rate_fin)

2022-06-22 18:38:26.318096: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-06-22 18:38:38.805033: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Unnamed: 0,loss,binary_accuracy,val_loss,val_binary_accuracy
0,0.382981,0.895899,0.315709,0.922488
1,0.30369,0.922373,0.290975,0.926278
2,0.287373,0.925242,0.279058,0.929496
3,0.27866,0.927292,0.271954,0.930926
4,0.273405,0.926982,0.267057,0.933143
5,0.269848,0.928046,0.263811,0.932285
6,0.267482,0.928293,0.261364,0.933071
7,0.265727,0.928722,0.259519,0.933143
8,0.264425,0.928801,0.258114,0.933429
9,0.263447,0.928817,0.257001,0.9335


In [74]:
model_5 = run_logreg_model(5, feat_5, rate_fin)

2022-06-22 18:40:39.881896: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-06-22 18:40:52.488558: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Unnamed: 0,loss,binary_accuracy,val_loss,val_binary_accuracy
0,0.371512,0.900301,0.30617,0.92492
1,0.293327,0.926156,0.278732,0.929138
2,0.274751,0.929675,0.26493,0.933643
3,0.26439,0.931828,0.256376,0.935216
4,0.257838,0.931916,0.250316,0.937791
5,0.253224,0.933203,0.246082,0.937576
6,0.249952,0.933481,0.242774,0.938434
7,0.247412,0.934117,0.240164,0.93822
8,0.24541,0.93426,0.238082,0.938506
9,0.243812,0.934347,0.236356,0.939006


In [75]:
model_6 = run_logreg_model(6, feat_6, rate_fin)

2022-06-22 18:42:53.543450: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-06-22 18:43:06.028554: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Unnamed: 0,loss,binary_accuracy,val_loss,val_binary_accuracy
0,0.339237,0.917328,0.290733,0.926207
1,0.279687,0.928213,0.267778,0.929496
2,0.263931,0.931209,0.255841,0.933786
3,0.254812,0.933203,0.248237,0.935645
4,0.248869,0.934315,0.242662,0.937505
5,0.244583,0.93542,0.238705,0.938792
6,0.241484,0.936302,0.235561,0.939864
7,0.239048,0.936699,0.233055,0.94015
8,0.237103,0.937263,0.23105,0.940365
9,0.235541,0.937478,0.229362,0.940579


In [76]:
model_7 = run_logreg_model(7, feat_7, rate_fin)

2022-06-22 18:45:07.314738: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-06-22 18:45:19.831040: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Unnamed: 0,loss,binary_accuracy,val_loss,val_binary_accuracy
0,0.341163,0.910582,0.285569,0.92778
1,0.274206,0.929882,0.263808,0.932714
2,0.259063,0.932996,0.252634,0.935645
3,0.250226,0.934744,0.245418,0.936503
4,0.244357,0.935174,0.240048,0.937147
5,0.240035,0.93596,0.23619,0.937075
6,0.236846,0.936723,0.233014,0.93729
7,0.234296,0.937065,0.230495,0.937576
8,0.232231,0.937358,0.228452,0.938434
9,0.230552,0.937613,0.226679,0.938434


In [None]:
model_7 = run_logreg_model(7, feat_7, rate_fin)

2022-06-22 18:45:07.314738: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-06-22 18:45:19.831040: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Unnamed: 0,loss,binary_accuracy,val_loss,val_binary_accuracy
0,0.341163,0.910582,0.285569,0.92778
1,0.274206,0.929882,0.263808,0.932714
2,0.259063,0.932996,0.252634,0.935645
3,0.250226,0.934744,0.245418,0.936503
4,0.244357,0.935174,0.240048,0.937147
5,0.240035,0.93596,0.23619,0.937075
6,0.236846,0.936723,0.233014,0.93729
7,0.234296,0.937065,0.230495,0.937576
8,0.232231,0.937358,0.228452,0.938434
9,0.230552,0.937613,0.226679,0.938434
