# Neural Network

### 0. Read Data from Prev Notebook

In [1]:
import pandas as pd

df = pd.read_csv('data-stage2.csv')
df

Unnamed: 0,category,amt,is_fraud,hour,trans_count_7d,trans_count_30d,time_diff
0,misc_net,4.97,0,1,0.0,0.0,0.000000
1,grocery_pos,107.23,0,1,0.0,0.0,0.000000
2,entertainment,220.11,0,1,0.0,0.0,0.000000
3,gas_transport,45.00,0,1,0.0,0.0,0.000000
4,misc_pos,41.96,0,1,0.0,0.0,0.000000
...,...,...,...,...,...,...,...
1852389,health_fitness,43.77,0,1,39.0,167.0,4.619444
1852390,kids_pets,111.84,0,1,62.0,272.0,2.706389
1852391,kids_pets,86.88,0,1,67.0,277.0,0.201111
1852392,travel,7.99,0,1,36.0,192.0,3.340278


### 1. Scaling and Encoding

In [2]:
from sklearn.compose import ColumnTransformer
from sklearn.compose import make_column_selector
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn import set_config

preprocessor = ColumnTransformer(
    transformers=[
        ("num", MinMaxScaler(), make_column_selector(dtype_include="float64")),
        ("cat", OneHotEncoder(), make_column_selector(dtype_include="object")),
    ],
    remainder='passthrough'
)

set_config(display="diagram")

preprocessor

In [3]:
X = preprocessor.fit_transform(df.drop(columns=['is_fraud']))
y = df[['is_fraud']]

### 2. Stratified Train-test Split

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    stratify=y
)

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(1481915, 19) (370479, 19) (1481915, 1) (370479, 1)


### 3. Oversampling Training Data

In [5]:
from imblearn.over_sampling import SMOTE

X_os, y_os = SMOTE(n_jobs=-1).fit_resample(X_train, y_train)

print(X_os.shape, y_os.shape)

(2948388, 19) (2948388, 1)


### 4. Hyper-parameter Tuning using Keras Tuner

Only run below if you want to hyperparameter tune the model further. It is time consuming because we optimize number of layers, number of units per layer and learning rate together. This uses Keras' Hyperband Tuner class to automatically set epochs and perform tournament rounds of optimizations.

In [6]:
!pip install keras-tuner -q
from tensorflow import keras
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
import keras_tuner as kt
from tensorflow.keras import layers

In [7]:
class MyHyperModel(kt.HyperModel):
    def build(self, hp):
        
        # Using a sequential Keras model
        model = keras.models.Sequential([keras.Input(shape=(19),)])
        
        # Initialize layer units to input layer size
        #  at each iteration scale down each layer randomly
        OldUnits = 19
        # Tune number of layers and units
        for i in range(hp.Int("num_layers", 1, 4)):       
            
            # Create hyper param boundaries
            NewScale = hp.Float(
                "unitscale_" + str(i), 
                min_value=.5, 
                max_value=1, step=.05
            )
            
            # Ceil to avoid scaling to 0
            NewUnits = np.ceil(NewScale*OldUnits)
            OldUnits = NewUnits
            model.add(
                layers.Dense(
                    units=NewUnits,
                    activation="relu",
                )
            )
            
        # Constant single output layer
        model.add(
            layers.Dense(
                units=1, 
                activation="sigmoid"
            )
        )
                                         
        # Compile model, tune learning rate
        model.compile(
            optimizer=keras.optimizers.SGD(hp.Choice("learning_rate", 
                                                     [5e-3, 1e-2, 5e-2, 1e-1, 5e-1])),
            loss=keras.losses.BinaryCrossentropy(), 
            metrics=[
                keras.metrics.BinaryAccuracy(name="accuracy"),
                keras.metrics.Precision(name="precision", ),
                keras.metrics.Recall(name="recall")
            ]
        )
        
        return model

In [None]:
#Instantiate hyper parameter tuning model
hypermodel = MyHyperModel()

# We can let the Tuner select our HPs
#  within the allotted ranges
#  or set them explicitly below
# hp = HyperParameters()
# Override values like so if needed:
#  hp.Choice("learning_rate", values=[1e-2, 1e-3, 1e-4])

# Hyperband Tuner, fast to converge on high quality model
tuner = kt.Hyperband(hypermodel,
                     metrics=[
                        keras.metrics.BinaryAccuracy(name="accuracy"),
                        keras.metrics.Precision(name="precision"),
                        keras.metrics.Recall(name="recall")
                     ],
                     objective=kt.Objective(name="val_precision", direction="max"),
                     max_epochs=150,
                     seed = 123,
                     directory='Hyperband_tuner_dir',
                     project_name='Hyperband_tuner',
                     overwrite=True)

tuner.search(
    X_os,
    y_os,
    epochs=15, 
    validation_data=(X_test, y_test),
    use_multiprocessing=True
)

In [None]:
tuner.results_summary()

### 5. Train Model using the Best Set of Hyper-parameters

In [None]:
model = keras.models.Sequential([keras.Input(shape=(19),)])

layer1_num_units = np.ceil(.9*19)
layer2_num_units = np.ceil(.8*layer1_num_units)
layer3_num_units = np.ceil(.6*layer2_num_units)
layer4_num_units = np.ceil(.7*layer3_num_units)

# Create hidden layers
model.add(
    layers.Dense(
        units=layer1_num_units,
        activation="relu",
    )
)

model.add(
   layers.Dense(
        units=layer2_num_units,
        activation="relu",
    )
)

model.add(
   layers.Dense(
        units=layer3_num_units,
        activation="relu",
    )
)

model.add(
   layers.Dense(
        units=layer4_num_units,
        activation="relu",
    )
)

# Single output layer
model.add(
    layers.Dense(
        units=1, 
        activation="sigmoid"
    )
)

# Compile model, tune learning rate
model.compile(
    optimizer=keras.optimizers.SGD(1e-2),
    loss=keras.losses.BinaryCrossentropy(), 
    metrics=[
        keras.metrics.BinaryAccuracy(name="accuracy"),
        keras.metrics.Precision(name="precision"),
        keras.metrics.Recall(name="recall")
    ]
)

print("Fit model on training data")
history = model.fit(
    X_os,
    y_os,
    # Change to 50 to get full results, for faster testing keep at 10
    epochs=50,
    validation_data=(X_test, y_test)
)


## Evaluate on full test set

In [None]:
print("Evaluate on test data")
results = model.evaluate(X_test, y_test)
print("test loss, test acc:", results)