# Dense Networks

This notebook shows how to apply dense networks into real life data

### Import all the needed modules

In [None]:
import os

import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler 
from sklearn.model_selection import train_test_split

### Define the path of the data source for convenience

Source of the data is the `train.csv` of https://www.kaggle.com/c/microsoft-malware-prediction/data

A subset of the data with 10000 rows is also provided with the code in the `data` directory

In [None]:
CSV_PATH = '../data/malware.csv'

### Read the first 10,000 rows of the data

In [None]:
df = pd.read_csv(CSV_PATH, nrows=10000)

### View the loaded dataset

In [None]:
df.head()

In [None]:
df.tail()

### Extract the labels of the data

In [None]:
y = df['HasDetections'].values

In [None]:
y

### Select a few columns as features

In [None]:
cols_to_use = ['ProductName', 'Platform', 'OsBuild', 'AVProductsInstalled']
X_raw = df[cols_to_use].copy()
X_raw.head(5)

### Check the data for missing data

Here we see a missing data for `AVProductsInstalled`

In [None]:
X_raw.isna().any()

### Impute the missing data

Using a conservative approach, all missing information are set to 0

In [None]:
X_raw.loc[X_raw['AVProductsInstalled'].isna(), 'AVProductsInstalled'] = 0

In [None]:
X_raw.isna().any(axis=0)

### Split the data to training and test

In [None]:
train_X_raw, test_X_raw, train_y, test_y = train_test_split(X_raw, y, test_size=0.2, random_state=0, stratify=y)
print(f'train_X_raw: {train_X_raw.shape}')
print(f'test_X_raw: {test_X_raw.shape}')
print(f'train_y: {train_y.shape}')
print(f'test_y: {test_y.shape}')

### Define encoders to convert the data to numeric

In [None]:
encoders = ColumnTransformer([
    ('ohe', OneHotEncoder(sparse=False, handle_unknown='ignore'), ['ProductName', 'Platform', 'OsBuild']),
    ('scaler', StandardScaler(), ['AVProductsInstalled'])
])

### Encode the training data. Note the use of the ```fit_transform``` for the training encoding

In [None]:
train_X = encoders.fit_transform(train_X_raw)

In [None]:
train_X

In [None]:
train_X.shape

### Encode the test data. Note the use of ``transform`` only for the testing encoding

In [None]:
test_X = encoders.transform(test_X_raw)

In [None]:
test_X

In [None]:
test_X.shape

### Create a Keras model to train for training.

The commented codes also shows how to add additional network elements for generalization such as:

*   Dropouts
*   Batch Normalization
*   Regularization

In [None]:
input_ = tf.keras.layers.Input(shape=(27,))
dense1 = tf.keras.layers.Dense(10, activation='relu')(input_)
reg1 = dense1
# reg1 = tf.keras.layers.Dropout(0.1)(dense1)
# reg1 = tf.keras.layers.BatchNormalization()(dense1)
dense2 = tf.keras.layers.Dense(256, activation='relu')(reg1)
reg2 = dense2
# reg2 = tf.keras.layers.Dropout(0.1)(dense2)
# reg2 = tf.keras.layers.BatchNormalization()(dense2)
# dense3 = tf.keras.layers.Dense(256, activation='relu', 
#                                kernel_regularizer=tf.keras.regularizers.l1_l2(l1=0.01, l2=0.01))(reg2)
dense3 = tf.keras.layers.Dense(256, activation='relu')(reg2)
output = tf.keras.layers.Dense(1, activation='sigmoid')(dense3)

In [None]:
model = tf.keras.Model(inputs=input_, outputs=output)

In [None]:
model.summary()

In [None]:
tf.keras.utils.plot_model(model)

In [None]:
model.compile('adam', 'binary_crossentropy')

### Define an early stop

This allows the training to stop if the validation loss does not improve after ```patience``` number of epochs

In [None]:
early_stop = tf.keras.callbacks.EarlyStopping(patience=20, restore_best_weights=True)

### Define a model checkpoint

This will save the model based on the parameters passed. In this model checkpoint, the best model is saved

In [None]:
MODEL_PATH = 'dense/checkpoints/model_at_{epoch:02d}.mdl'
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(MODEL_PATH)

### Train the model

Add a validation split to check for overfitting and provide the early stop and model checkpoint as callbacks

In [None]:
training_history = model.fit(train_X, train_y, batch_size=32, epochs=100, validation_split=0.2, callbacks=[early_stop, model_checkpoint])

In [None]:
!ls dense/checkpoints/

### Save the model

In [None]:
SAVED_MODEL_PATH = 'dense/model.mdl'
model.save(SAVED_MODEL_PATH)

### Load the saved model

In [None]:
saved_model = tf.keras.models.load_model(SAVED_MODEL_PATH)

### Verify the structure of the loaded model

In [None]:
saved_model.summary()

In [None]:
tf.keras.utils.plot_model(saved_model)

### Consume the best model

Prediction and evaluation can be performed on the best model

In [None]:
test_y_pred = saved_model.predict(test_X)

In [None]:
saved_model.evaluate(test_X, test_y)