In [None]:
%matplotlib inline

In [None]:
# Model architecture
from keras.layers import Dense, Dropout, Conv1D, MaxPool1D, Flatten, SpatialDropout1D
from keras.models import Sequential
from keras import optimizers
from keras.callbacks import Callback

# General packages
import numpy as np
import os
import h5py
import seaborn as sns
import matplotlib.pyplot as plt

# Data preparation and validation packages
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

# Jupyter interactive plotting
from IPython.display import clear_output

# Callback functions

Callback functions, as the name suggests, are type of functions that are called by particular part of your code the moment it executes and process the data. In the following case I created the simple class than can be used to plot the value of loss function and accuracy during the training/validation of the model.

It will be called by `fit` method of `Sequential` object in order to train the model.

In [None]:
# real time plotting
class PlotLosses(Callback):
    def on_train_begin(self, logs={}):
        self.i = 0
        self.x = []
        self.losses = []
        self.accuracies = []
        self.val_losses = []
        self.val_accuracies = []
        
        self.fig = plt.figure()
        
        self.logs = []

    def on_epoch_end(self, epoch, logs={}):
        
        self.logs.append(logs)
        self.x.append(self.i)
        self.losses.append(logs.get('loss'))
        self.accuracies.append(logs.get('acc'))
        self.val_losses.append(logs.get('val_loss'))
        self.val_accuracies.append(logs.get('val_acc'))
        self.i += 1
        
        clear_output(wait=True)
        plt.plot(self.x, self.losses, label="loss")
        plt.plot(self.x, self.accuracies, label="accuracy")
        plt.plot(self.x, self.val_losses, label="val_loss")
        plt.plot(self.x, self.val_accuracies, label="val_accuracy")
        plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        plt.show();
        
plot_losses = PlotLosses()

# Data preparation

Here I prepare the data for the model as was described in previous notebooks.

In [None]:
data_dir = os.path.join(os.path.dirname(os.getcwd()),"data")
hdf5_filename = "example.hdf5"
h5=h5py.File(os.path.join(data_dir,hdf5_filename), 'r')
strain = np.array(h5["Strain"]["Strain"].value)
h5.close()

In [None]:
# Let's make artificial dataset
data = np.zeros((1000, strain.shape[0]))
data[:] = strain
rescaled_data = preprocessing.minmax_scale(data.T).T
rescaled_data = np.reshape(rescaled_data, (rescaled_data.shape[0], rescaled_data.shape[1], 1))

In [None]:
# Let's make artificial labels - 3 classes
labels = np.random.randint(0, 3, rescaled_data.shape[0])
n_classes = labels.max() + 1

In [None]:
# Let's shuffle data
ind = np.random.permutation(rescaled_data.shape[0])
rescaled_data = np.take(rescaled_data, ind, axis=0)
labels = np.take(labels, ind)

In [None]:
# Let's convert labels into one hot encoding
onehot_encoder = OneHotEncoder(sparse=False)
labels = labels.reshape(labels.shape[0], 1)
targets = onehot_encoder.fit_transform(labels)

In [None]:
# Split the data for training and validation
x_train, x_val, y_train, y_val = train_test_split(rescaled_data, targets, test_size=0.3)

# Data architecture

**Important note!** Keras can work either with Tensorflow background or Theano. They differ in one crucial aspect - shape of the data.

Tensorflow requires the data to be stored in the following way (for 1D CNN):

- n_samples, n_features, n_channels

Whereas Theano requires:

- n_samples, n_channels, n_features

Channel in case of 2D CNN denotes to amount of colors on the image but in case of 1D it can be used to express one feature based on two datasets.

Make sure that the shape of the data is correct with respect to the Tensorflow.

In [None]:
model = Sequential()
model.add(Conv1D(nb_filter=40, filter_length=3, activation="relu", input_shape=(data.shape[1],1)))
model.add(MaxPool1D(2))
#model.add(SpatialDropout1D(0.2))

model.add(Conv1D(nb_filter=20, filter_length=3, activation="relu"))
model.add(MaxPool1D(2))
#model.add(Dropout(0.2))

# Flatten your convolutional part to fit the dense part of the model
model.add(Flatten())

model.add(Dense(200, activation="relu"))
model.add(Dense(n_classes, activation="softmax"))

In [None]:
opt = optimizers.Adam(lr=0.001)

In [None]:
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=["acc"])

In [None]:
model.fit(x_train, y_train,
                epochs=5,
                batch_size=64,
                validation_data=(x_val, y_val),
                callbacks=[plot_losses])

In [None]:
y_predicted = model.predict(x_val)

In [None]:
cms = confusion_matrix(y_val.argmax(1), y_predicted.argmax(1))

In [None]:
test_score = np.trace(cms) / np.sum(cms)

In [None]:
fig = plt.figure(figsize=(18, 14))
ax = fig.add_subplot(111)
im = ax.imshow(np.transpose(cms), interpolation="nearest", cmap="cool")
rows = cms.shape[0]
cols = cms.shape[1]
for x in range(0, rows):
    for y in range(0, cols):
        value = int(cms[x, y])
        ax.text(x, y, value, color="black", ha="center", va="center", fontsize=25)
plt.title("Real vs predicted data, accuracy: " + str(test_score), fontsize=25)
plt.colorbar(im)

classes_values = []
classes_labels = []
for n in range(n_classes):
    classes_values.append(n)
    classes_labels.append(str(n))

#plt.xticks([0, 1, 2, 3, 4], ["0", "1", "2", "3", "4"], rotation=45, fontsize=25)
#plt.yticks([0, 1, 2, 3, 4], ["0", "1", "2", "3", "4"], fontsize=25)
plt.xticks(classes_values, classes_labels, rotation=45, fontsize=25)
plt.yticks(classes_values, classes_labels, fontsize=25)
plt.xlabel("Real data", fontsize=25)
plt.ylabel("Predicted data", fontsize=25)