In [18]:
# DATA MANIPULATION
import numpy as np

# DATA VISUALIZATION
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import KFold, train_test_split
from sklearn.preprocessing import StandardScaler

# BLOB DATASET
from sklearn.datasets import make_blobs

# KERAS
from keras import Sequential, Input, layers, regularizers
from keras.callbacks import EarlyStopping
from keras.utils import to_categorical

In [19]:
X, y = make_blobs(
    n_samples=2000,
    n_features=10,
    centers=8,
    cluster_std=7
)

In [20]:
plt.scatter(X.T[0], X.T[1], c=y);

In [21]:
y_cat = to_categorical(y)

In [22]:
y

array([1, 1, 1, ..., 7, 5, 5])

In [23]:
y_cat

array([[0., 1., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 1., 0., 0.]])

In [24]:
def initialize_model():

    model = Sequential()
    model.add(Input(shape=(10,)))
    model.add(layers.Dense(25, activation='relu'))    # Input layer
    model.add(layers.Dense(10, activation='relu'))    # Hidden layer
    model.add(layers.Dense(8, activation='softmax'))  # Predictive layer

    model.compile(
        loss='categorical_crossentropy',
        optimizer='adam',
        metrics=['accuracy']
    )

    return model

In [None]:
%%time

kf = KFold(n_splits=10)
kf.get_n_splits(X)

results = []

for train_index, test_index in kf.split(X):
    
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y_cat[train_index], y_cat[test_index]

    standard_scaler = StandardScaler()
    standard_scaler.fit(X_train)

    X_train_scaled = standard_scaler.transform(X_train)
    X_test_scaled = standard_scaler.transform(X_test)

    model = initialize_model()

    model.fit(
        X_train_scaled,
        y_train,
        epochs=150,
        batch_size=16,
        verbose=0
    )

    res = model.evaluate(X_test_scaled, y_test, verbose=0)
    results.append(res)

In [None]:
accuracy = [_[1] for _ in results]

print(f'New mean accuracy: {np.mean(accuracy)*100:.2f}% (±{np.std(accuracy)*100:.2f})')

New mean accuracy: 80.15% (±1.91)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_cat, test_size=0.3)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((1400, 10), (600, 10), (1400, 8), (600, 8))

In [None]:
standard_scaler = StandardScaler()
standard_scaler.fit(X_train)

X_train_scaled = standard_scaler.transform(X_train)
X_test_scaled = standard_scaler.transform(X_test)

In [None]:
%%time

model = initialize_model()

history = model.fit(
    X_train_scaled,
    y_train,
    validation_split=0.3,
    shuffle=True,
    epochs=500,
    batch_size=16,
    verbose=0
)

CPU times: total: 2min 36s
Wall time: 3min 26s


In [None]:
results = model.evaluate(X_test_scaled, y_test, verbose=0)

print(f'The accuracy on the test set is {results[1]:.2f}')

The accuracy on the test set is 0.75


In [None]:
def plot_loss_accuracy(history, title=None):
    fig, ax = plt.subplots(1,2, figsize=(20,7))

    # --- LOSS ---

    ax[0].plot(history.history['loss'])
    ax[0].plot(history.history['val_loss'])

    ax[0].set_title('Model loss')
    ax[0].set_ylabel('Loss')
    ax[0].set_xlabel('Epoch')

    ax[0].set_ylim((0,3))

    ax[0].legend(['Train', 'Val'], loc='best')

    ax[0].grid(axis="x",linewidth=0.5)
    ax[0].grid(axis="y",linewidth=0.5)

    # --- ACCURACY

    ax[1].plot(history.history['accuracy'])
    ax[1].plot(history.history['val_accuracy'])

    ax[1].set_title('Model Accuracy')
    ax[1].set_ylabel('Accuracy')
    ax[1].set_xlabel('Epoch')

    ax[1].legend(['Train', 'Val'], loc='best')

    ax[1].set_ylim((0,1))

    ax[1].grid(axis="x",linewidth=0.5)
    ax[1].grid(axis="y",linewidth=0.5)

    if title:
        fig.suptitle(title)

In [None]:
plot_loss_accuracy(history)

In [None]:
%%time

es = EarlyStopping()

model = initialize_model()

history = model.fit(
    X_train_scaled,
    y_train,
    validation_split=0.3,
    epochs=500,
    batch_size=16,
    verbose=1,
    callbacks=[es]
)

In [None]:
plot_loss_accuracy(history)

results = model.evaluate(X_test_scaled, y_test, verbose=0)
print(f'The accuracy on the test set is {results[1]:.2f}')

In [None]:
history.history

In [None]:
%%time

model = initialize_model()

es = EarlyStopping(patience=30)

history = model.fit(
    X_train_scaled,
    y_train,
    validation_split=0.3,
    epochs=500,
    batch_size=16,
    verbose=0,
    callbacks=[es]
)


plot_loss_accuracy(history)

results = model.evaluate(X_test_scaled, y_test, verbose=0)
print(f'The accuracy on the test set is {results[1]:.2f}')

In [None]:
%%time

model = initialize_model()

es = EarlyStopping(patience=30, restore_best_weights=True)

history = model.fit(
    X_train_scaled,
    y_train,
    validation_split=0.3,
    epochs=500,
    batch_size=16,
    verbose=0,
    callbacks=[es]
)

plot_loss_accuracy(history)

results = model.evaluate(X_test_scaled, y_test, verbose=0)
print(f'The accuracy on the test set is {results[1]:.2f}')