In [None]:
from keras.layers import Conv3D, MaxPool3D, Flatten, Dense
from keras.layers import Dropout, Input, BatchNormalization
from sklearn.metrics import confusion_matrix, accuracy_score
from plotly.offline import iplot, init_notebook_mode
from keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adadelta, Adam
from keras.utils.np_utils import to_categorical
from keras.models import Sequential, load_model
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
import plotly.graph_objs as go
from matplotlib.pyplot import cm
import matplotlib.pyplot as plt
from keras.models import Model
import numpy as np
import keras
import h5py

init_notebook_mode(connected=True)
%matplotlib inline
plt.style.use('fivethirtyeight')

# Loading the Dataset

In [None]:
with h5py.File('../input/3d-mnist/full_dataset_vectors.h5', 'r') as dataset:
    x_train, x_test = dataset["X_train"][:], dataset["X_test"][:]
    y_train, y_test = dataset["y_train"][:], dataset["y_test"][:]

print ("x_train shape: ", x_train.shape)
print ("y_train shape: ", y_train.shape)

print ("x_test shape:  ", x_test.shape)
print ("y_test shape:  ", y_test.shape)

# Visualize Examples

##Let's look at the first 5 digits in our dataset in 3D space!

In [None]:
with h5py.File("../input/3d-mnist/train_point_clouds.h5", "r") as points_dataset:
    digits = []
    for i in range(10):
        digit = (points_dataset[str(i)]["img"][:],
                 points_dataset[str(i)]["points"][:],
                 points_dataset[str(i)].attrs["label"])
        digits.append(digit)

index = -1

for j in [1, -2, 5, -3, -1, 0]:
    x_c = [r[0] for r in digits[j][1]]
    y_c = [r[1] for r in digits[j][1]]
    z_c = [r[2] for r in digits[j][1]]

    index += 1

    trace1 = go.Scatter3d(x = x_c, y = y_c, z = z_c, mode = "markers", marker_symbol = "circle-open",
                          marker = dict(size = 3, color = y_c, colorscale = "Phase", opacity = 1))

    data = [trace1]

    layout = go.Layout(height = 600, width = 900, template = "plotly_dark",
                       title= f"Digit: {index} in 3D space")

    fig = go.Figure(data = data, layout = layout)

    fig.update_layout(title = f"Digit: {index} in 3D space", font = dict(family = "PT Sans", size = 15))

    iplot(fig)

# Converting the input to 4D shape to use 3D convolution


## 1. 3D convolutions applies a 3 dimensional filter to the dataset and the filter moves 3-direction (x, y, z) to calcuate the low level feature representations.Their output shape is a 3 dimensional volume space such as cube or cuboid.

## 2. They are helpful in event detection in videos, 3D medical images etc. They are not limited to 3d space but can also be applied to 2d space inputs such as images.

In [None]:
# Introduce the channel dimension in the input dataset
xtrain = np.ndarray((x_train.shape[0], 4096, 3))
xtest = np.ndarray((x_test.shape[0], 4096, 3))

# Translate data to color
def add_rgb_dimension(array):
    scalar_map = cm.ScalarMappable(cmap = "Oranges")
    return scalar_map.to_rgba(array)[:, : -1]

# Iterate through train and test, add the RGB dimension
for i in range(x_train.shape[0]):
    xtrain[i] = add_rgb_dimension(x_train[i])
for i in range(x_test.shape[0]):
    xtest[i] = add_rgb_dimension(x_test[i])

# Convert to 1 + 4D space (1st argument represents number of rows in the dataset)
xtrain = xtrain.reshape(x_train.shape[0], 16, 16, 16, 3)
xtest = xtest.reshape(x_test.shape[0], 16, 16, 16, 3)

# Convert target variable into one-hot
y_train = to_categorical(y_train, 10)

In [None]:
xtrain.shape, y_train.shape

# Convolutional Layers

## Although we are working with 3D MNIST in this instance, I have found this visualization of convolutional layers to be quite helpful for gaining intuition on the inner workings of a CNN.

In [None]:
import IPython
from IPython.display import display
from PIL import Image
IPython.display.Image(filename='../input/convnn/conv-nn.png')

# Feature Maps

## Convolutional layers have a set of whose depth is equal to the input. Other dimensions can be set manually. If a kernel is initialized with values in a speciifc configuration, they can be used to transform an input image and find various. These filters when convolved over the input image produce.

In [None]:
IPython.display.Image(filename='../input/featuremap/feature-map.png')

# Max Pooling

## Max Pooling reduces the spatial dimensions of the feature maps before traversing through the fully connected layers.

In [None]:
IPython.display.Image(filename='../input/maxpool/max-pooling.png')

# Building 3D Convolutional Neural Network

## Lets create the model architecture. The architecture is described below:

**Input and Output layers:**

- One input layer with dimension (16, 16, 16, 3) and output layer with dimension 10.

**Convolutions:**

- Apply 4 convolutional layers with increasing order of filter size (16, 32, 64, 128) and fixed kernel size = (3, 3, 3).

- Apply 2 max pooling layers, one after 2nd convolutional layer and one after fourth convolutional layer.

**MLP architecture:**

- Batch normalization on convolutional architecture.

- Dense layers with 2 layers followed by dropout to avoid overfitting.

In [None]:
input_layer = Input((16, 16, 16, 3)) # Input image dimensions

# Building a 3D ConvNet
# CNN is derived from the convolutional operator (dot product of 2 functions to produce a 3rd function)

model = Sequential() # Sequential Keras API which is a linear stack of layers

model.add(Conv3D(filters = 16, # The number of filters (Kernels) used with this layer

                 kernel_size = (3, 3, 3), # The dimensions of the feature map

                 activation = "relu", # Activation function - Rectified Linear Unit (ReLU)

                 strides = 1, # How much the window (feature map) shifts by in each of the dimensions

                 padding = "same", # When stride = 1, output spatial shape is the same as input spatial shape

                 use_bias = False, # If use_bias is True, a bias vector is created and added to the outputs.

                 # There are two conventions for shapes of images tensors: the channels-last convention
                 # (used by TensorFlow) and the channels-first convention (used by Theano)."
                 # Deep Learning with Python - François Chollet
                 data_format = "channels_last"))

# Scales down outliers and forces the network to learn features in a distributed way
# By not relying too much on any particular weight, this helps the model better generalize the images
model.add(BatchNormalization())

model.add(Conv3D(filters = 32, kernel_size = (3, 3, 3), activation = "relu",
                 use_bias = False, strides = 1, padding = "same", data_format = "channels_last"))

model.add(BatchNormalization())

# Max Pooling reduces the spatial dimensions of the feature maps before the fully connected layers
model.add(MaxPool3D(pool_size = (2, 2, 2))) # the pool_size (2, 2, 2) halves the size of its input

model.add(Conv3D(filters = 64, kernel_size = (3, 3, 3), activation = "relu",
                 use_bias = False, strides = 1, padding = "same", data_format = "channels_last"))

model.add(BatchNormalization())

model.add(Conv3D(filters = 128, kernel_size = (3, 3, 3), activation = "relu",
                 use_bias = False, strides = 1, padding = "same", data_format = "channels_last"))

model.add(BatchNormalization())
model.add(MaxPool3D(pool_size = (2, 2, 2))) # the pool_size (2, 2, 2) halves the size of its input

# To help avoid overfitting we can add Dropout.
# This randomly drops some percentage of neurons, and thus the weights become re-aligned
model.add(Dropout(0.2)) # No more BatchNorm after this layer because we introduce Dropout

# Finally, we can add a flatten layer to map the input to a 1D vector
# We then add fully connected layers after some convolutional/pooling layers.

model.add(Flatten())
model.add(Dense(4096, activation = "relu"))
model.add(Dropout(0.45))
model.add(Dense(1024, activation = "relu"))
model.add(Dropout(0.45))
model.add(Dense(10, activation = "softmax")) # activation function for Multi-Class Classification

# Compiling the Model

**Step 1 - Specify the optimizer used by the model:**

- We will be using the Adam optimizer in this instance but please refer to the [Keras documentation](https://keras.io/api/optimizers/) for a comprehensive list of optimizers available.

- The Adam optimizer (Adaptive Moment Estimation) is an improved version of gradient descent. Interestingly, the Adam algorithm doesn't use a single global learning rate Alpha. It uses a different learning rates for every single parameter of your model.

- The intuition behind the Adam algorithm is if the coefficients (weights and bias) keep moving in roughly the same direction, Adam will increase the learning rate for that parameter. In other words, let's go faster in that direction. -->

**Step 2 - Specify the loss function of the model:**

- For Binary Classification we use "binary_crossentropy" and for Multi-class Classification we use "categorical_crossentropy".

**Step 3 - Specify metric to evaluate model performance:**

- We will be using the accuracy metric but please refer to the [Keras documentation](https://keras.io/api/metrics/) for a comprehensive list of metrics available.

In [None]:
optimizer = Adam(learning_rate = 0.0001) # Optimizer
# Adam will increase the learning rate when the coefficients are moving in roughly the same direction

In [None]:
# Compiling the model
model.compile(optimizer = optimizer, loss = "categorical_crossentropy", metrics = ["accuracy"])

# Learning Rate Scheduler

- At the beginning of every epoch, this callback retrieves the updated learning rate value from the schedule function provided at __init__, with the current epoch and current learning rate, and applies the updated learning rate on the optimizer. [Source](https://keras.io/api/callbacks/learning_rate_scheduler/)

- A learning rate that is too high will make the learning jump over minima but a too low learning rate will either take too long to converge or get stuck in an undesirable local minimum.

**Essentially, the Learning Rate Scheduler outputs a new learning rate after each epoch iteration.**

In [None]:
reduce_lr = LearningRateScheduler(lambda x: 1e-3 * 0.9 ** x)

# More Callbacks

**A callback is an object that can perform actions at various stages of training (e.g. at the start or end of an epoch, before or after a single batch, etc).** [Source](https://keras.io/api/callbacks/)

In [None]:
filepath = "best_weight.h5"
patience_earlystop = 7
patience_ReduceLROnPlateau = 3

mcp = ModelCheckpoint(filepath, monitor = "val_loss", mode = "min",
                      save_best_only = True, save_weights_only = True, verbose = 1)

learning_rate_reduction = ReduceLROnPlateau(monitor = "val_acc", patience = patience_ReduceLROnPlateau,
                                            verbose = 1, factor = 0.5, min_lr = 1e-5)

# Fitting the Model

In [None]:
# Hyper Parameters
BATCH_SIZE = 64
EPOCHS = 50
history = model.fit(x = xtrain, y = y_train, validation_data = (x_test, y_test),
                    batch_size = BATCH_SIZE, epochs = EPOCHS,
                    validation_split = 0.3, verbose = 1,
                    callbacks = [reduce_lr, learning_rate_reduction, mcp])

# Training and Validation Curves

In [None]:
plt.figure(figsize = (12, 6))
accuracy = history.history["accuracy"]
val_accuracy = history.history["val_accuracy"]
loss = history.history["loss"]
val_loss = history.history["val_loss"]
epochs = range(len(accuracy))
plt.plot(epochs, accuracy, 'b', label = "Training accuracy")
plt.plot(epochs, val_accuracy, 'r', label = "Validation accuracy")
plt.title("Training and validation accuracy")
plt.legend()
plt.figure(figsize = (12, 6))
plt.plot(epochs, loss, 'b', label = "Training loss")
plt.plot(epochs, val_loss, 'r', label = "Validation loss")
plt.title("Training and validation loss")
plt.legend()
plt.show()

# **The End**