# Neural Network Pipelines

In [None]:
from sklearn.datasets import load_sample_images
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import OneHotEncoder, FunctionTransformer
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import seaborn as sns

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.wrappers import scikit_learn
from tensorflow.keras.callbacks import EarlyStopping

## Learning Objectives

- Use `tensorflow` to code up a neural network model
- Use wrappers inside `tensorflow` to make models that can jibe with `sklearn`
- Add a `tensorflow` network into an `sklearn` pipeline

In [None]:
digits = load_digits()
X = digits.data
y = digits.target

In [None]:
X[0]

In [None]:
plt.imshow(X[0].reshape(8, 8));

That's a 0!

In [None]:
y[0]

#### Getting data ready for modeling
**Preprocessing**:

- use train_test_split to create X_train, y_train, X_test, and y_test
- Split training data into pure_train and validation sets.
- Scale the pixel intensity to a value between 0 and 1.

Scaling our input variables will help speed up our neural network.

Since our minimum intensity is 0, we can normalize the inputs by dividing each value by the max value (16).

In [None]:
X_train, X_test, y_train, y_test =\
    train_test_split(X, y, random_state=42, test_size=0.2)

In [None]:
X_train, X_test = X_train/16, X_test/16

For a multiclass output, our neural net expects our target to be in a certain form.

In [None]:
ohe = OneHotEncoder(sparse=False)
y_train_enc = ohe.fit_transform(y_train.reshape(-1, 1))
y_test_enc = ohe.transform(y_test.reshape(-1, 1))

In [None]:
y_test_enc

To complete our model we'll want a softmax activation in the output layer:

$$\large \text{Softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)}$$

The sofmax function outputs a number between 0 and 1 for each of our classes.  All of the probabilities of the classes sum up to 1.

The number of nodes in our output layer equals the number of categories in our dataset.

We also need a new loss function: **categorical crossentropy**, which calculates a separate loss for each label and then sums the results.

In [None]:
# Model with ten output neurons:

model = Sequential()
model.add(Dense(12, activation='tanh', input_dim=64))
model.add(Dense(8, activation='tanh'))
model.add(Dense(10, activation='softmax'))

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

results = model.fit(X_train, y_train_enc,
                   epochs=20, batch_size=100)

We can access the history of our model via `results.history`.
Use __dict__ to take a tour.

In [None]:
results.__dict__

In [None]:
tanh_loss = results.history['loss']
tanh_accuracy = results.history['accuracy']

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
sns.lineplot(x=results.epoch, y=tanh_loss, ax=ax1, label='loss')
sns.lineplot(x=results.epoch, y=tanh_accuracy, ax=ax2, label='accuracy');

We have two plots above both relating to the quality of our model.  The left-hand plot is our loss. It uses the probabilities associated with our predictions to judge how well our prediction fits reality. We want it to decrease as far as possible.

The accuracy judges how well the predictions are after applying the threshold at the output layer.  We want accuracy to increase.

If we look at our loss, it is still decreasing. That is a signal that our model is **still learning**. If our model is still learning, we can allow it to get better by turning a few dials.

Let's:
- increase the number of epochs;
- change tanh activation in the hidden layers to ReLU; and
- decrease the batch size.

In [None]:
y_train_enc.shape

In [None]:
model2 = Sequential()
model2.add(Dense(12, activation='relu', input_dim=64))
model2.add(Dense(8, activation='relu'))
model2.add(Dense(10, activation='softmax'))

model2.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# batch_size of None means batch_size = 32!
results = model2.fit(X_train, y_train_enc, epochs=50, batch_size=None, verbose=1)

In [None]:
relu_loss = results.history['loss']
relu_accuracy = results.history['accuracy']

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
sns.lineplot(x=results.epoch, y=relu_loss, ax=ax1, label='loss')
sns.lineplot(x=results.epoch, y=relu_accuracy, ax=ax2, label='accuracy');

### Adding in Validation Data

We have been looking only at our training set. Let's add in our validation set to the picture. Check the docstring for the `.fit()` method and add in our validation data.

In [None]:
X_pure_train, X_val, y_pure_train_enc, y_val_enc =\
    train_test_split(X_train, y_train_enc, random_state=42, test_size=0.2)

In [None]:
model3 = Sequential()
model3.add(Dense(12, activation='relu', input_dim=64))
model3.add(Dense(8, activation='relu'))
model3.add(Dense(4, activation='relu'))
model3.add(Dense(10, activation='softmax'))

model3.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


results = model3.fit(X_pure_train, y_pure_train_enc,
                    validation_data=(X_val, y_val_enc),
                    epochs=50,
                    batch_size=10)

train_loss = results.history['loss']
train_acc = results.history['accuracy']
val_loss = results.history['val_loss']
val_acc = results.history['val_accuracy']

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
sns.lineplot(x=results.epoch, y=train_loss, ax=ax1, label='train_loss')
sns.lineplot(x=results.epoch, y=train_acc, ax=ax2, label='train_accuracy')

sns.lineplot(x=results.epoch, y=val_loss, ax=ax1, label='val_loss')
sns.lineplot(x=results.epoch, y=val_acc, ax=ax2, label='val_accuracy');

In [None]:
results.history['val_accuracy'][-1]

## Connecting with `sklearn`

The `keras.wrappers` submodule means that we can turn `keras` models into estimators that `sklearn` tools will recognize.

In [None]:
# This will throw an error.

cross_val_score(model3, X_pure_train, y_pure_train)

But if we use the `scikit_learn` wrapper we can take advantage of our familiar scikit-learn tools!

In [None]:
def build_model():
    model = Sequential()
    model.add(Dense(12, activation='relu', input_dim=64))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(4, activation='relu'))
    model.add(Dense(10, activation = 'softmax'))

    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [None]:
keras_model = scikit_learn.KerasClassifier(build_model,
                                          epochs=50,
                                          batch_size=32,
                                          verbose=2)

In [None]:
type(keras_model)

In [None]:
cross_val_score(keras_model, X_pure_train, y_pure_train)

## Other Metrics

In [None]:
history = results.history
training_loss = history['loss']
val_loss = history['val_loss']
training_accuracy = history['accuracy']
val_accuracy = history['val_accuracy']

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))


sns.lineplot(x=list(range(len(training_loss))),
             y=training_loss, color='r', label='training', ax=ax1)
sns.lineplot(x=list(range(len(val_loss))),
             y=val_loss, color='b', label='validation', ax=ax1)
sns.lineplot(x=list(range(len(training_loss))),
             y=training_accuracy, color='r', label='training',ax=ax2)
sns.lineplot(x=list(range(len(val_loss))),
             y=val_accuracy, color='b', label='validation',ax=ax2)
ax1.legend();

In [None]:
y_hat_test = np.argmax(model3.predict(X_test), axis=-1)

In [None]:
confusion_matrix(y_hat_test, y_test)

## Pipelining

In [None]:
def my_scaler(x):
  return x/16

In [None]:
Scaler = FunctionTransformer(my_scaler)

In [None]:
steps = [('scaler', Scaler), ('model', keras_model)]

In [None]:
pipe = Pipeline(steps=steps)

In [None]:
X_raw_train, X_raw_test, y_raw_train, y_raw_test = train_test_split(X, y, random_state=43)

In [None]:
ohe2 = OneHotEncoder(sparse=False)
y_raw_train_enc = ohe2.fit_transform(y_raw_train.reshape(-1, 1))
y_raw_test_enc = ohe2.transform(y_raw_test.reshape(-1, 1))

In [None]:
pipe.fit(X_raw_train, y_raw_train_enc,
         model__epochs=30, model__validation_data=(X_raw_test, y_raw_test_enc))