# Chapter 20

## Neural networks

### 20.1 Preprocessing data for neural networks

In [None]:
from sklearn import preprocessing 
import numpy as np

In [None]:
features = np.array([[-100.1, 3240.1],
                        [-200.2, -234.1],
                        [5000.5, 150.1],
                        [6000.6, -125.1],
                        [9000.9, -673.1]])

scaler = preprocessing.StandardScaler()

features_standardized = scaler.fit_transform(features)
features_standardized

In [None]:
print(
    f'Mean: {features_standardized[:, 0].mean()}',
    f'Standard deviation: {features_standardized[:, 0].std()}',
    sep = '\n'
)

### 20.2 Designing a neural network

In [None]:
from keras import models 
from keras import layers

In [None]:
network = models.Sequential()
network.add(layers.Dense(units=16, activation="relu", input_shape=(10,))) 
network.add(layers.Dense(units=16, activation="relu"))
network.add(layers.Dense(units=1, activation="sigmoid"))

In [None]:
network.compile(
    loss="binary_crossentropy",
    optimizer="rmsprop",
    metrics=['accuracy']
)

### 20.3 Training a binary classifier

In [None]:
import numpy as np
from keras.datasets import imdb
from keras.preprocessing.text import Tokenizer 
from keras import models
from keras import layers

In [None]:
np.random.seed(0)
number_of_features = 1000

(data_train, target_train), (data_test, target_test) = imdb.load_data(
    num_words=number_of_features
)

tokenizer = Tokenizer(num_words=number_of_features)
features_train = tokenizer.sequences_to_matrix(data_train, mode="binary")
features_test = tokenizer.sequences_to_matrix(data_test, mode="binary")

network = models.Sequential()
network.add(layers.Dense(units=16, activation="relu", input_shape=(number_of_features,)))
network.add(layers.Dense(units=16, activation="relu"))
network.add(layers.Dense(units=1, activation="sigmoid"))

network.compile(
    loss="binary_crossentropy", 
    optimizer="rmsprop",
    metrics=["accuracy"]
)

In [None]:
history = network.fit(
    features_train, 
    target_train, 
    epochs=3, 
    verbose=2,
    batch_size=100,
    validation_data=(features_test, target_test)
)

### 20.4 Training a multiclass classifier

In [None]:
import numpy as np
from keras.datasets import reuters
from keras.utils.np_utils import to_categorical 
from keras.preprocessing.text import Tokenizer 
from keras import models
from keras import layers

In [None]:
np.random.seed(0)
number_of_features = 5000

data = reuters.load_data(num_words=number_of_features)
(data_train, target_vector_train), (data_test, target_vector_test) = data

tokenizer = Tokenizer(num_words=number_of_features)
features_train = tokenizer.sequences_to_matrix(data_train, mode="binary")
features_test = tokenizer.sequences_to_matrix(data_test, mode="binary")

target_train = to_categorical(target_vector_train)
target_test = to_categorical(target_vector_test)

In [None]:
network = models.Sequential()
network.add(layers.Dense(units=100, activation="relu", input_shape=(number_of_features,)))
network.add(layers.Dense(units=100, activation="relu"))
network.add(layers.Dense(units=46, activation="softmax"))

network.compile(
    loss="categorical_crossentropy", 
    optimizer="rmsprop",
    metrics=["accuracy"]
)

history = network.fit(
    features_train, 
    target_train, 
    epochs=3, 
    verbose=1, 
    batch_size=100,
    validation_data=(features_test, target_test)
)

In [None]:
object = features_train

print(
    f'Shape: {object.shape}',
    f'Dimension: {object.ndim}',
    f'Size: {object.size}',
    sep = '\n'
)

### 20.5 Training a regressor

In [None]:
import numpy as np
from keras.preprocessing.text import Tokenizer
from keras import models
from keras import layers
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split 
from sklearn import preprocessing

In [None]:
np.random.seed(0)

features, target = make_regression(
    n_samples = 10000,
    n_features = 3,
    n_informative = 3,
    n_targets = 1,
    noise = 0.0,
    random_state = 0
)

features_train, features_test, target_train, target_test = train_test_split(
    features, target, test_size=0.33, random_state=0
)

In [None]:
network = models.Sequential()
network.add(layers.Dense(units=32, activation="relu", input_shape=(features_train.shape[1],)))
network.add(layers.Dense(units=32, activation="relu"))
network.add(layers.Dense(units=1))

network.compile(
    loss='mse',
    optimizer='RMSprop',
    metrics=['mse']
)

history = network.fit(
    features_train,
    target_train,
    epochs=10,
    verbose=1,
    batch_size=100,
    validation_data=(features_test, target_test)
)

### 20.6 Making predictions

In [None]:
import numpy as np
from keras.datasets import imdb
from keras.preprocessing.text import Tokenizer 
from keras import models
from keras import layers

In [None]:
np.random.seed(0)
number_of_features = 10000

(data_train, target_train), (data_test, target_test) = imdb.load_data(
    num_words=number_of_features
)

tokenizer = Tokenizer(num_words=number_of_features)
features_train = tokenizer.sequences_to_matrix(data_train, mode="binary")
features_test = tokenizer.sequences_to_matrix(data_test, mode="binary")

In [None]:
network = models.Sequential()
network.add(layers.Dense(units=16, activation="relu", input_shape=(number_of_features,)))
network.add(layers.Dense(units=16, activation="relu"))
network.add(layers.Dense(units=1, activation="sigmoid"))

network.compile(
    loss="binary_crossentropy",
    optimizer="rmsprop",
    metrics=["accuracy"]
)

history = network.fit(
    features_train,
    target_train,
    epochs=3,
    verbose=0,
    batch_size=100, 
    validation_data=(features_test, target_test)
) 

In [None]:
predicted_target = network.predict(features_test)
predicted_target[0]

### 20.7 Visualize training history

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
from keras.datasets import imdb
from keras.preprocessing.text import Tokenizer 
from keras import models
from keras import layers
import matplotlib.pyplot as plt

sns.set_theme(style="darkgrid")

In [None]:
np.random.seed(0)
number_of_features = 10000

(data_train, target_train), (data_test, target_test) = imdb.load_data(
    num_words=number_of_features
)

tokenizer = Tokenizer(num_words=number_of_features)
features_train = tokenizer.sequences_to_matrix(data_train, mode="binary")
features_test = tokenizer.sequences_to_matrix(data_test, mode="binary")

In [None]:
network = models.Sequential()
network.add(layers.Dense(units=16, activation="relu", input_shape=(number_of_features,)))
network.add(layers.Dense(units=16, activation="relu"))
network.add(layers.Dense(units=1, activation="sigmoid"))

network.compile(
    loss="binary_crossentropy",
    optimizer="rmsprop",
    metrics=["accuracy"]
)

history = network.fit(
    features_train,
    target_train,
    epochs=15,
    verbose=0,
    batch_size=1000, 
    validation_data=(features_test, target_test)
)

#### Matplotlib

In [None]:
training_loss = history.history["loss"]
test_loss = history.history["val_loss"]
epoch_count = range(1, len(training_loss) + 1)

plt.figure(figsize=(20,10))
plt.plot(epoch_count, training_loss, "r--")
plt.plot(epoch_count, test_loss, "b-")
plt.legend(["Training Loss", "Test Loss"])
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.show()

#### Seaborn

In [None]:
data = pd.DataFrame(
    list(zip(training_loss, test_loss, epoch_count)),
    columns=['training_loss', 'test_loss', 'epoch_count']
)

data_long = pd.melt(
    frame=data,
    id_vars='epoch_count',
    value_vars=['training_loss', 'test_loss']
)

data_long.head()

In [None]:
sns.set(rc={"figure.dpi":300})

sns.relplot(
    data=data_long,
    x='epoch_count',
    y='value',
    hue='variable',
    aspect = 40/20
)

In [None]:
training_accuracy = history.history["accuracy"]
test_accuracy = history.history["val_accuracy"]

plt.figure(figsize=(20,10))
plt.plot(epoch_count, training_accuracy, "r--")
plt.plot(epoch_count, test_accuracy, "b-")
plt.legend(["Training Accuracy", "Test Accuracy"])
plt.xlabel("Epoch")
plt.ylabel("Accuracy Score")
plt.show()

### 20.8 Reducing overfitting with weight regularization

In [None]:
import numpy as np
from keras.datasets import imdb
from keras.preprocessing.text import Tokenizer 
from keras import models
from keras import layers
from keras import regularizers

In [None]:
np.random.seed(0)
number_of_features = 1000

(data_train, target_train), (data_test, target_test) = imdb.load_data(
    num_words=number_of_features
)

tokenizer = Tokenizer(num_words=number_of_features)
features_train = tokenizer.sequences_to_matrix(data_train, mode="binary")
features_test = tokenizer.sequences_to_matrix(data_test, mode="binary")

In [None]:
network = models.Sequential()

network.add(layers.Dense(units=16,
                         activation="relu",
                         kernel_regularizer=regularizers.l2(0.01),
                         input_shape=(number_of_features,)))

network.add(layers.Dense(units=16,
                         kernel_regularizer=regularizers.l2(0.01),
                         activation="relu"))

network.add(layers.Dense(units=1, activation="sigmoid"))

network.compile(
    loss="binary_crossentropy", 
    optimizer="rmsprop",
    metrics=["accuracy"]
)

history = network.fit(
    features_train,
    target_train,
    epochs=15,
    verbose=0,
    batch_size=1000,
    validation_data=(features_test, target_test)
) 

In [None]:
def plot_training_loss(training_loss, test_loss, epoch_count):
    data = pd.DataFrame(
        list(zip(training_loss, test_loss, epoch_count)),
        columns=['training_loss', 'test_loss', 'epoch_count']
    )

    data_long = pd.melt(
        frame=data,
        id_vars='epoch_count',
        value_vars=['training_loss', 'test_loss']
    )

    sns.set(rc={"figure.dpi":300})

    sns.relplot(
        data=data_long,
        x='epoch_count',
        y='value',
        hue='variable',
        aspect = 40/20
    )

In [None]:
training_loss = history.history["loss"]
test_loss = history.history["val_loss"]
epoch_count = range(1, len(training_loss) + 1)

plot_training_loss(training_loss, test_loss, epoch_count)

### 20.9 Reducing overfitting with early stopping

In [None]:
import numpy as np
from keras.datasets import imdb
from keras.preprocessing.text import Tokenizer
from keras import models
from keras import layers
from keras.callbacks import EarlyStopping, ModelCheckpoint