In [54]:
import numpy as np
import pandas as pd
import os
import shutil
import re
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import keras
from keras.layers import Input, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, MaxPooling2D, Dropout
from keras.layers import ZeroPadding3D, Conv3D, MaxPooling3D
from keras import optimizers
from keras.models import Model

import keras.backend as K
K.set_image_data_format('channels_last')

%matplotlib inline

In [2]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 16492708217666113544
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 2670198784
locality {
  bus_id: 1
}
incarnation: 6312969162566649163
physical_device_desc: "device: 0, name: GeForce GTX 780, pci bus id: 0000:03:00.0, compute capability: 3.5"
]


In [3]:
path = "/mnt/ML-drive/scanner-ml/Artem/share/Valerio/"
class_names = ['C100keV','TestSample']
n_pols = 8

In [4]:
def load_images(path, name_dir='TestSample', n_pols=8):
    
    img_ind = []
    path = path+name_dir+"/crops/"
    img_names = os.listdir(path)
    for name in img_names:
        img_ind.append(re.split('[_.]',name))
    img_ind = pd.DataFrame(np.array(img_ind),columns=['ViewID','gr','GrainID','pol','Polarization','cl','ClusterID','csv'])
    img_ind = img_ind.drop(['csv'],axis=1).sort_values(['ViewID','GrainID','Polarization'])
    for view in np.unique(img_ind['ViewID']):
        view_imgs = img_ind[ img_ind['ViewID']==view ]
        for grain in np.unique(view_imgs['GrainID']):
            grain_imgs = view_imgs[ view_imgs['GrainID']==grain ]
            if grain_imgs.shape[0]!=8:
                img_ind = img_ind.drop(grain_imgs.index)
    img_names = []
    for name in img_ind.values:
        img_names.append('_'.join(name)+'.csv')
    
    i=0
    im_array = []
    for name in img_names:
        if i==0 : tmp_im = []
        tmp_im.append(pd.read_csv(path+name, header=None).drop(31, axis=1).values)
        i+=1
        if i==n_pols:
            im_array.append(np.array(tmp_im).T)
            i=0
    return np.array(im_array)

In [5]:
%%time
X_sign = load_images(path, 'C100keV')
y_sign = np.ones((X_sign.shape[0],1))
X_noise = load_images(path, 'TestSample')
y_noise = np.zeros((X_noise.shape[0],1))

X = np.vstack((X_sign, X_noise))
y = np.vstack((y_sign, y_noise))

X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.15, stratify=y)

# Normalize image vectors (????)
#X_train = X_train_orig/255.
#X_test = X_test_orig/255.

print ("number of training examples = " + str(X_train.shape[0]))
print ("number of test examples = " + str(X_test.shape[0]))
print ("X_train shape: " + str(X_train.shape))
print ("Y_train shape: " + str(Y_train.shape))
print ("X_test shape: " + str(X_test.shape))
print ("Y_test shape: " + str(Y_test.shape))

number of training examples = 19703
number of test examples = 3478
X_train shape: (19703, 31, 31, 8)
Y_train shape: (19703, 1)
X_test shape: (3478, 31, 31, 8)
Y_test shape: (3478, 1)
CPU times: user 7min 32s, sys: 3.25 s, total: 7min 35s
Wall time: 7min 35s


# Simplified 3D CNN model

In [108]:
%%time

X_ddd_train = np.array([X_train.T]).T
X_ddd_test = np.array([X_test.T]).T

print ("X_3d_train shape: " + str(X_ddd_train.shape))
print ("Y_train shape: " + str(Y_train.shape))
print ("X_3d_test shape: " + str(X_ddd_test.shape))
print ("Y_test shape: " + str(Y_test.shape))

X_3d_train shape: (19703, 31, 31, 8, 1)
Y_train shape: (19703, 1)
X_3d_test shape: (3478, 31, 31, 8, 1)
Y_test shape: (3478, 1)
CPU times: user 2.93 s, sys: 343 ms, total: 3.27 s
Wall time: 3.23 s


In [109]:
def DDD_model(input_shape=(31,31,8,1), classes=2):
    
    
    X_input = Input(input_shape)
    
    X = ZeroPadding3D()(X_input)
    X = Conv3D(64, 3, name='conv1')(X)
    #X = BatchNormalization(name='batch1')(X)
    X = Activation('relu')(X)
    X = MaxPooling3D(name='pool1')(X)
    print('conv1\t',X.get_shape())
    
    X = Conv3D(128, 3, name='conv2')(X)
    #X = BatchNormalization(name='batch2')(X)
    X = Activation('relu')(X)
    X = MaxPooling3D(name='pool2')(X)
    print('conv2\t',X.get_shape())
    
    X = Flatten()(X)
    X = Dense(1, activation='sigmoid', name='fc')(X)
    if classes != 2 : print('oh no, too many classes, change the model output to softmax!')
    
    model = Model(inputs = X_input, outputs = X, name='3D_CNN_model')
    return model

In [110]:
ddd_model = DDD_model(input_shape=(31,31,8,1))

conv1	 (?, 15, 15, 4, 64)
conv2	 (?, 6, 6, 1, 128)


In [111]:
ddd_model.compile(optimizer='nadam', loss='binary_crossentropy', metrics=['accuracy'])

In [112]:
ddd_model.fit(X_ddd_train, Y_train, epochs = 3, batch_size = 128, validation_split=0.15)

Train on 16747 samples, validate on 2956 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f2ee975be48>

In [113]:
preds = ddd_model.evaluate(X_ddd_test, Y_test)
print ("Loss = " + str(preds[0]))
print ("Test Accuracy = " + str(preds[1]))

Loss = 4.9642333353710555
Test Accuracy = 0.6886141461637802


# VGG 3D

In [114]:
def DDD_VGG_model(input_shape=(31,31,8,1), classes=2):
    
    
    X_input = Input(input_shape)

    X = Conv3D(64, 3, padding='same', activation='relu', name='conv1_1')(X_input)
    X = Conv3D(64, 3, padding='same', name='conv1_2')(X)
    #X = BatchNormalization(name='batch1')(X)
    X = Activation('relu')(X)
    X = MaxPooling3D(name='pool1')(X)
    print('conv1\t',X.get_shape())
    
    X = Conv3D(128, 3, padding='same', name='conv2')(X)
    #X = BatchNormalization(name='batch2')(X)
    X = Activation('relu')(X)
    X = MaxPooling3D(name='pool2')(X)
    print('conv2\t',X.get_shape())
    
    X = Dropout(rate=0.5, name='drop_middle')(X)
    
    X = Conv3D(256, 3, padding='same', name='conv3')(X)
    #X = BatchNormalization(name='batch3')(X)
    X = Activation('relu')(X)
    X = MaxPooling3D(name='pool3')(X)
    print('conv3\t',X.get_shape())
    
    X = Conv3D(256, 3, padding='same', name='conv4')(X)
    #X = BatchNormalization(name='batch4')(X)
    X = Activation('relu')(X)
    X = MaxPooling3D(padding='same', name='pool4')(X)
    print('conv4\t',X.get_shape())
    
    X = Dropout(rate=0.5, name='drop')(X)
    
    X = Flatten()(X)
    X = Dense(1, activation='sigmoid', name='fc')(X)
    if classes != 2 : print('oh no, too many classes, change the model output to softmax!')
    
    model = Model(inputs = X_input, outputs = X, name='3D_VGG_model')
    return model

In [115]:
ddd_vgg_model = DDD_VGG_model(input_shape=(31,31,8,1))

conv1	 (?, 15, 15, 4, 64)
conv2	 (?, 7, 7, 2, 128)
conv3	 (?, 3, 3, 1, 256)
conv4	 (?, 2, 2, 1, 256)


In [116]:
ddd_vgg_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [117]:
ddd_vgg_model.fit(X_ddd_train, Y_train, epochs = 30, batch_size = 128, validation_split=0.15)

Train on 16747 samples, validate on 2956 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f2ef75d4710>

In [118]:
preds = ddd_vgg_model.evaluate(X_ddd_test, Y_test)
print ("Loss = " + str(preds[0]))
print ("Test Accuracy = " + str(preds[1]))

Loss = 0.32809947185162636
Test Accuracy = 0.8530764805989542


In [119]:
ddd_vgg_model.fit(X_ddd_train, Y_train, epochs = 70, batch_size = 128, validation_split=0.15)

Train on 16747 samples, validate on 2956 samples
Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70
Epoch 27/70
Epoch 28/70
Epoch 29/70
Epoch 30/70
Epoch 31/70
Epoch 32/70
Epoch 33/70
Epoch 34/70
Epoch 35/70
Epoch 36/70
Epoch 37/70
Epoch 38/70
Epoch 39/70
Epoch 40/70
Epoch 41/70
Epoch 42/70
Epoch 43/70
Epoch 44/70
Epoch 45/70
Epoch 46/70
Epoch 47/70
Epoch 48/70
Epoch 49/70
Epoch 50/70
Epoch 51/70
Epoch 52/70
Epoch 53/70
Epoch 54/70
Epoch 55/70
Epoch 56/70
Epoch 57/70
Epoch 58/70
Epoch 59/70
Epoch 60/70
Epoch 61/70
Epoch 62/70
Epoch 63/70
Epoch 64/70
Epoch 65/70
Epoch 66/70
Epoch 67/70
Epoch 68/70
Epoch 69/70
Epoch 70/70


<keras.callbacks.History at 0x7f2f46fe17b8>

In [120]:
preds = ddd_vgg_model.evaluate(X_ddd_test, Y_test)
print ("Loss = " + str(preds[0]))
print ("Test Accuracy = " + str(preds[1]))

Loss = 0.12498975413636684
Test Accuracy = 0.9631972397587092


# VGG 3D with BatchNorm

In [121]:
def DDD_VGG_b_model(input_shape=(31,31,8,1), classes=2):
    
    
    X_input = Input(input_shape)

    X = Conv3D(64, 3, padding='same', activation='relu', name='conv1_1')(X_input)
    X = Conv3D(64, 3, padding='same', name='conv1_2')(X)
    X = BatchNormalization(name='batch1')(X)
    X = Activation('relu')(X)
    X = MaxPooling3D(name='pool1')(X)
    print('conv1\t',X.get_shape())
    
    X = Conv3D(128, 3, padding='same', name='conv2')(X)
    X = BatchNormalization(name='batch2')(X)
    X = Activation('relu')(X)
    X = MaxPooling3D(name='pool2')(X)
    print('conv2\t',X.get_shape())
    
    X = Dropout(rate=0.5, name='drop_middle')(X)
    
    X = Conv3D(256, 3, padding='same', name='conv3')(X)
    X = BatchNormalization(name='batch3')(X)
    X = Activation('relu')(X)
    X = MaxPooling3D(name='pool3')(X)
    print('conv3\t',X.get_shape())
    
    X = Conv3D(256, 3, padding='same', name='conv4')(X)
    X = BatchNormalization(name='batch4')(X)
    X = Activation('relu')(X)
    X = MaxPooling3D(padding='same', name='pool4')(X)
    print('conv4\t',X.get_shape())
    
    X = Dropout(rate=0.5, name='drop')(X)
    
    X = Flatten()(X)
    X = Dense(1, activation='sigmoid', name='fc')(X)
    if classes != 2 : print('oh no, too many classes, change the model output to softmax!')
    
    model = Model(inputs = X_input, outputs = X, name='3D_VGG_b_model')
    return model

In [122]:
ddd_vgg_b_model = DDD_VGG_b_model(input_shape=(31,31,8,1))

conv1	 (?, 15, 15, 4, 64)
conv2	 (?, 7, 7, 2, 128)
conv3	 (?, 3, 3, 1, 256)
conv4	 (?, 2, 2, 1, 256)


In [123]:
ddd_vgg_b_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [126]:
ddd_vgg_b_model.fit(X_ddd_train, Y_train, epochs = 30, batch_size = 64, validation_split=0.15)

Train on 16747 samples, validate on 2956 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f2ef57bc860>

In [127]:
preds = ddd_vgg_b_model.evaluate(X_ddd_test, Y_test)
print ("Loss = " + str(preds[0]))
print ("Test Accuracy = " + str(preds[1]))

Loss = 0.29728871789590045
Test Accuracy = 0.8803910293614747
