This notebook was created at 23:53 18/10/20

Liquid crystal phase classification using deep learning techniques

First attempt to classify liquid crystal phases using the dataset shared by Josh Heaton.

In [None]:
pip install pydot

In [15]:
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Model
from keras.layers import Input, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D
from keras.layers import AveragePooling2D, MaxPooling2D, Dropout, GlobalMaxPooling2D, GlobalAveragePooling2D
from keras.layers.experimental.preprocessing import Rescaling, RandomFlip
from keras.preprocessing import image_dataset_from_directory
from keras.utils import plot_model
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.metrics import confusion_matrix

Load in the data.

In [16]:
train_directory = "C:/Users/Jason/Documents/University/Year_4/MPhys_Project(s)/Liquid_crystals-machine_learning/Images_from_drive_bigger_val/Training"
test_directory = "C:/Users/Jason/Documents/University/Year_4/MPhys_Project(s)/Liquid_crystals-machine_learning/Images_from_drive_bigger_val/Validation"
image_size = (200,200)

# Change images to grayscale as colour isnt an important feature at this stage
train_dataset = image_dataset_from_directory(train_directory,
                            labels="inferred",
                            label_mode="categorical",
                            color_mode="grayscale",
                            batch_size=64,
                            image_size=image_size,
                            shuffle=True
                        )
val_dataset = image_dataset_from_directory(test_directory,
                            labels="inferred",
                            label_mode="categorical",
                            color_mode="grayscale",
                            batch_size=64,
                            image_size=image_size,
                            shuffle=True
                        )

Found 1775 files belonging to 5 classes.
Found 452 files belonging to 5 classes.


Let's see if the files imported as expected.

In [17]:
print(train_dataset.element_spec)
print(train_dataset.class_names)
for data, labels in train_dataset:
    print(data.shape)
    print(data.dtype)
    print(labels.shape)
    print(labels.dtype)

(TensorSpec(shape=(None, 200, 200, 1), dtype=tf.float32, name=None), TensorSpec(shape=(None, 5), dtype=tf.float32, name=None))
['Cholesteric', 'Columnar', 'Nematic', 'Smectic', 'Twist_grain_boundary']
(64, 200, 200, 1)
<dtype: 'float32'>
(64, 5)
<dtype: 'float32'>
(64, 200, 200, 1)
<dtype: 'float32'>
(64, 5)
<dtype: 'float32'>
(64, 200, 200, 1)
<dtype: 'float32'>
(64, 5)
<dtype: 'float32'>
(64, 200, 200, 1)
<dtype: 'float32'>
(64, 5)
<dtype: 'float32'>
(64, 200, 200, 1)
<dtype: 'float32'>
(64, 5)
<dtype: 'float32'>
(64, 200, 200, 1)
<dtype: 'float32'>
(64, 5)
<dtype: 'float32'>
(64, 200, 200, 1)
<dtype: 'float32'>
(64, 5)
<dtype: 'float32'>
(64, 200, 200, 1)
<dtype: 'float32'>
(64, 5)
<dtype: 'float32'>
(64, 200, 200, 1)
<dtype: 'float32'>
(64, 5)
<dtype: 'float32'>
(64, 200, 200, 1)
<dtype: 'float32'>
(64, 5)
<dtype: 'float32'>
(64, 200, 200, 1)
<dtype: 'float32'>
(64, 5)
<dtype: 'float32'>
(64, 200, 200, 1)
<dtype: 'float32'>
(64, 5)
<dtype: 'float32'>
(64, 200, 200, 1)
<dtype: 'floa

In [18]:
print(val_dataset.element_spec)
print(val_dataset.class_names)
for data, labels in val_dataset:
    print(data.shape)
    print(data.dtype)
    print(labels.shape)
    print(labels.dtype)

(TensorSpec(shape=(None, 200, 200, 1), dtype=tf.float32, name=None), TensorSpec(shape=(None, 5), dtype=tf.float32, name=None))
['Cholesteric', 'Columnar', 'Nematic', 'Smectic', 'Twist_grain_boundary']
(64, 200, 200, 1)
<dtype: 'float32'>
(64, 5)
<dtype: 'float32'>
(64, 200, 200, 1)
<dtype: 'float32'>
(64, 5)
<dtype: 'float32'>
(64, 200, 200, 1)
<dtype: 'float32'>
(64, 5)
<dtype: 'float32'>
(64, 200, 200, 1)
<dtype: 'float32'>
(64, 5)
<dtype: 'float32'>
(64, 200, 200, 1)
<dtype: 'float32'>
(64, 5)
<dtype: 'float32'>
(64, 200, 200, 1)
<dtype: 'float32'>
(64, 5)
<dtype: 'float32'>
(64, 200, 200, 1)
<dtype: 'float32'>
(64, 5)
<dtype: 'float32'>
(4, 200, 200, 1)
<dtype: 'float32'>
(4, 5)
<dtype: 'float32'>


Let's define a function to plot a graph to see how our model works over iterations. (From Josh Heaton)

In [19]:
def plot_loss_acc_history(history):
    fig, axis = plt.subplots(2)
    #fig.subtitle('Training losses and accuracies')
    
    axis[0].plot(history.history['loss'], label='loss')
    axis[0].plot(history.history['val_loss'], label='val_loss')
    axis[0].set_xlabel('Epoch')
    axis[0].set_ylabel('Loss')
    axis[0].legend(loc='upper right')
    
    axis[1].plot(history.history['accuracy'], label='accuracy')
    axis[1].plot(history.history['val_accuracy'], label='val_accuracy')
    axis[1].set_xlabel('Epoch')
    axis[1].set_ylabel('Accuracy')
    axis[1].legend(loc='upper right')

    plt.show()

Let's define our pipeline.

In [20]:
image_shape = (image_size[0], image_size[1], 1)
X_inputs = Input(shape = image_shape)
# Rescale images to have values in range [0,1]
X = Rescaling(scale = 1/255)(X_inputs)
X = RandomFlip()(X)

# Apply convolutional and pooling layers
X = Conv2D(filters=16, kernel_size=(3,3))(X)
X = BatchNormalization()(X)
X = Activation("relu")(X)
X = MaxPooling2D(pool_size=(2,2))(X)

X = Conv2D(filters=32, kernel_size=(3,3))(X)
X = BatchNormalization()(X)
X = Activation("relu")(X)
X = MaxPooling2D(pool_size=(2,2))(X)

X = Conv2D(filters=64, kernel_size=(3,3))(X)
X = BatchNormalization()(X)
X = Activation("relu")(X)
X = MaxPooling2D(pool_size=(2,2))(X)

X = Conv2D(filters=128, kernel_size=(3,3))(X)
X = BatchNormalization()(X)
X = Activation("relu")(X)
X = MaxPooling2D(pool_size=(3,2))(X)

# Apply fully connected layers
X = Flatten()(X)
X = Dense(units=128, activation="relu")(X)
X = Dropout(0.5)(X)
X = Dense(units=64, activation="relu")(X)
X = Dropout(0.5)(X)
# Output layer
num_classes = 5
X_outputs = Dense(units=num_classes, activation="softmax")(X)

model = Model(inputs = X_inputs, outputs = X_outputs)

Let's see what this model looks like.

In [21]:
model.summary()

Model: "functional_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 200, 200, 1)]     0         
_________________________________________________________________
rescaling_1 (Rescaling)      (None, 200, 200, 1)       0         
_________________________________________________________________
random_flip_1 (RandomFlip)   (None, 200, 200, 1)       0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 198, 198, 16)      160       
_________________________________________________________________
batch_normalization_4 (Batch (None, 198, 198, 16)      64        
_________________________________________________________________
activation_4 (Activation)    (None, 198, 198, 16)      0         
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 99, 99, 16)       

In [10]:
plot_model(model, to_file="LC_phase_CNN.png", show_shapes=True)

('Failed to import pydot. You must `pip install pydot` and install graphviz (https://graphviz.gitlab.io/download/), ', 'for `pydotprint` to work.')


Now we need to compile, train and test the model.

In [11]:
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [12]:
# callbacks (hopefully will prevent validation loss flucuations previously seen)
reduce_lr = ReduceLROnPLateau(monitor="val_loss")
early_stop = EarlyStopping(monitor="val_loss", patience=10, restore_best_weight=True)

history = model.fit(x=train_dataset, epochs=100, verbose=2, callbacks=[reduce_lr, early_stop], validation_data=val_dataset)

Epoch 1/100
28/28 - 119s - loss: 1.3747 - accuracy: 0.4862 - val_loss: 1.5831 - val_accuracy: 0.3053
Epoch 2/100
28/28 - 118s - loss: 0.7953 - accuracy: 0.7256 - val_loss: 1.6561 - val_accuracy: 0.3053
Epoch 3/100
28/28 - 119s - loss: 0.5367 - accuracy: 0.8208 - val_loss: 1.7390 - val_accuracy: 0.3053
Epoch 4/100
28/28 - 118s - loss: 0.3871 - accuracy: 0.8625 - val_loss: 2.0817 - val_accuracy: 0.1128
Epoch 5/100
28/28 - 118s - loss: 0.3041 - accuracy: 0.9025 - val_loss: 2.7977 - val_accuracy: 0.1128
Epoch 6/100
28/28 - 118s - loss: 0.2120 - accuracy: 0.9341 - val_loss: 2.5941 - val_accuracy: 0.1128
Epoch 7/100
28/28 - 118s - loss: 0.1440 - accuracy: 0.9521 - val_loss: 3.6319 - val_accuracy: 0.1128
Epoch 8/100
28/28 - 113s - loss: 0.1357 - accuracy: 0.9572 - val_loss: 3.6973 - val_accuracy: 0.1681
Epoch 9/100
28/28 - 115s - loss: 0.1186 - accuracy: 0.9662 - val_loss: 5.6680 - val_accuracy: 0.0951
Epoch 10/100
28/28 - 118s - loss: 0.0854 - accuracy: 0.9758 - val_loss: 5.3433 - val_accura

KeyboardInterrupt: 

Let's see how the model does on unseen data.

In [22]:
model.evaluate(val_dataset, verbose=2)

5/5 - 1s - loss: 1.3145 - accuracy: 0.6422


[1.3144651651382446, 0.6421725153923035]

In [13]:
plot_loss_acc_history(history)

NameError: name 'history' is not defined

Let's see the confusion matrix on our predictions to understand how our model is performing on the unseen data.

In [24]:
predictions = model.predict(val_dataset)
y_pred = np.argmax(predictions, axis = 1)

In [25]:
# Get true labels
y_true = np.argmax(np.concatenate([labels for data, labels in val_dataset], axis=0), axis=1)
print(y_true)
print(y_pred)

print("Confusion matrix:")
print(confusion_matrix(y_true=y_true, y_pred=y_pred, normalize="true"))

[2 2 2 0 3 0 2 1 4 3 3 0 0 3 2 0 2 0 0 0 0 4 0 3 2 4 2 4 2 3 3 2 2 3 3 2 0
 0 3 4 2 4 3 2 0 0 2 2 2 3 0 0 0 2 3 2 3 1 2 3 2 0 0 2 0 4 2 0 0 4 2 2 2 3
 2 3 4 2 2 0 3 3 3 0 2 1 0 0 0 2 4 0 0 2 2 0 2 4 3 2 0 4 0 0 0 1 2 1 2 1 0
 2 0 0 0 1 1 2 2 1 3 0 0 0 1 2 3 2 3 2 3 2 3 2 2 0 0 1 3 1 3 4 0 0 2 4 1 2
 0 3 0 3 4 1 2 0 2 0 1 3 2 3 0 0 0 0 2 3 4 2 1 0 2 2 3 0 0 0 0 0 0 3 0 0 2
 2 4 0 3 3 2 3 3 0 0 3 2 0 2 0 2 0 0 1 1 0 4 0 2 3 4 2 2 0 0 0 0 0 1 0 3 2
 0 0 3 4 4 2 2 2 0 0 0 0 2 0 0 3 2 1 0 1 0 0 2 2 2 3 4 3 3 2 2 2 4 1 3 2 2
 2 4 2 2 2 3 0 0 0 3 0 0 2 4 2 2 4 2 0 0 0 0 4 4 0 0 0 2 4 0 4 0 2 0 2 2 0
 4 0 3 3 2 1 3 0 0 2 2 4 3 2 2 2 2]
[2 0 4 1 0 4 2 1 1 0 0 4 4 2 0 4 0 4 1 1 0 0 3 0 1 0 1 1 4 1 1 0 0 4 1 1 0
 4 1 1 1 4 1 4 1 4 0 4 0 0 3 3 0 0 0 2 3 4 1 3 0 1 0 0 2 0 1 1 4 0 0 0 3 1
 2 1 0 0 2 1 3 0 0 0 2 4 0 3 2 3 2 3 4 3 1 1 0 1 1 1 0 0 0 0 0 4 4 1 0 0 0
 4 3 4 4 0 0 2 1 3 1 4 3 4 4 4 2 4 0 0 0 1 0 1 4 0 0 0 0 1 0 1 3 0 2 1 1 3
 1 3 4 2 1 1 4 1 4 0 0 3 2 0 0 3 0 4 0 4 0 4 2 1 0 0 0 1 4 4 4 2