In [98]:
import sklearn as skl
import numpy as np
import h5py
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics
import pandas as pd
import ipywidgets as widgets
from ipywidgets import interact, interactive
from numpy import random
from keras.layers import *
from keras.callbacks import EarlyStopping, ModelCheckpoint

# Data Prep

In [5]:
data = h5py.File('/Users/kategroschner/Downloads/Chiral_D_2018_10_31_FilteredStack.emd','r')
subdata = data['data']
subdata = subdata['Masked Particles']
subdata = subdata['data'][:,:,:]

In [6]:
labels = np.loadtxt('/Users/kategroschner/Downloads/Chiral_D_ManualValues_2018_10_31.csv',delimiter=',')

In [7]:
def balance_noFive(images, labels, min_num_labels):
    final_image = []
    final_labels = []
    count_left = 0
    count_right = 0
    count_garbage = 0
    #count_idk = 0
    for idx, l in enumerate(labels):
        if l == 4:
            if count_left < min_num_labels:
                final_image.append(images[idx])
                final_labels.append(l)
                count_left+=1
        if l == 5:
            if count_garbage < min_num_labels:
                final_image.append(images[idx])
                final_labels.append(0)
                count_garbage += 1
        if l == 6:
            if count_right < min_num_labels:
                final_image.append(images[idx])
                final_labels.append(l)
                count_right += 1
        if l == 0:
            if count_garbage < min_num_labels:
                final_image.append(images[idx])
                final_labels.append(l)
                count_garbage += 1
    return np.array(final_image),np.array(final_labels)

In [8]:
sort_images, sort_labels = balance_noFive(subdata,labels,90)

In [9]:
def shuffle(img_stack,labels):
    vals = np.arange(0,len(labels))
    random.shuffle(vals)
    new_stack = np.zeros(img_stack.shape)
    new_labels = np.ones(labels.shape)
    for old_idx,new_idx in enumerate(vals):
        new_stack[old_idx] = img_stack[new_idx]
        new_labels[old_idx] = labels[new_idx]
    return new_stack,new_labels

In [10]:
final_images, final_labels = shuffle(sort_images,sort_labels)

In [11]:
np.save('particles_nofive.npy',final_images)
np.save('labels_nofive.npy',final_labels)

In [11]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [12]:
from keras.preprocessing.image import ImageDataGenerator

In [13]:
from keras.utils import to_categorical

In [14]:
images = final_images[:,50:150,50:150]
images = np.expand_dims(images,axis = 3)

In [15]:
lbls = final_labels.copy()

In [16]:
lbls[lbls == 4] = 1
lbls[lbls == 6] =2

In [17]:
labels = to_categorical(lbls)

In [18]:
labels.shape

(270, 3)

# Try basic CNN

In [19]:
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(100, 100,1)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(3))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [20]:
batch_size = 16

In [21]:
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=False)

In [22]:
test_datagen = ImageDataGenerator(rescale=1./255)

In [23]:
trainX = images[:175]
trainY = labels[:175]

In [42]:
trainX.shape

(175, 100, 100, 1)

In [24]:
valX = images[175:225]
valY = labels[175:225]

In [25]:
seed = 42

In [26]:
train_generator = train_datagen.flow(trainX, y=trainY, batch_size=batch_size,seed=seed)
val_generator = test_datagen.flow(valX,y=valY,batch_size=batch_size,seed=seed)

In [27]:
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 98, 98, 32)        320       
_________________________________________________________________
activation_1 (Activation)    (None, 98, 98, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 49, 49, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 47, 47, 32)        9248      
_________________________________________________________________
activation_2 (Activation)    (None, 47, 47, 32)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 23, 23, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 21, 21, 64)        18496     
__________

In [28]:
model.fit_generator(
        train_generator,
        steps_per_epoch=500 // batch_size,
        epochs=10,
        validation_data=val_generator,
        validation_steps=200 // batch_size)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x137b8cda0>

In [59]:
model.fit_generator(
        train_generator,
        steps_per_epoch=500 // batch_size,
        epochs=10,
        validation_data=val_generator,
        validation_steps=200 // batch_size)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x13995a198>

In [30]:
predicted = model.predict(images[225:250])

In [31]:
predicted

array([[0.0000000e+00, 0.0000000e+00, 1.0000000e+00],
       [1.0000000e+00, 0.0000000e+00, 0.0000000e+00],
       [0.0000000e+00, 1.0000000e+00, 0.0000000e+00],
       [0.0000000e+00, 1.0000000e+00, 0.0000000e+00],
       [0.0000000e+00, 0.0000000e+00, 1.0000000e+00],
       [0.0000000e+00, 1.0000000e+00, 0.0000000e+00],
       [1.0000000e+00, 0.0000000e+00, 0.0000000e+00],
       [1.0000000e+00, 5.8300371e-30, 0.0000000e+00],
       [1.0000000e+00, 0.0000000e+00, 0.0000000e+00],
       [0.0000000e+00, 0.0000000e+00, 1.0000000e+00],
       [0.0000000e+00, 1.0000000e+00, 0.0000000e+00],
       [0.0000000e+00, 1.0000000e+00, 0.0000000e+00],
       [0.0000000e+00, 0.0000000e+00, 1.0000000e+00],
       [0.0000000e+00, 0.0000000e+00, 1.0000000e+00],
       [0.0000000e+00, 0.0000000e+00, 1.0000000e+00],
       [0.0000000e+00, 1.0000000e+00, 0.0000000e+00],
       [1.0000000e+00, 0.0000000e+00, 0.0000000e+00],
       [0.0000000e+00, 0.0000000e+00, 1.0000000e+00],
       [9.7332025e-23, 1.000

In [32]:
pred_bin = predicted.copy()
pred_bin[pred_bin<0.95] = 0
pred_bin[pred_bin>0] = 1

In [33]:
p_lbl = []
for i in pred_bin:
    if i[0] == 1:
        p_lbl.append(0)
    elif i[1] == 1:
        p_lbl.append(1)
    elif i[2] == 1:
        p_lbl.append(2)

In [34]:
p_lbl = np.array(p_lbl)

In [35]:
p_lbl.shape

(25,)

In [36]:
p_lbl

array([2, 0, 1, 1, 2, 1, 0, 0, 0, 2, 1, 1, 2, 2, 2, 1, 0, 2, 1, 2, 2, 1,
       1, 1, 1])

In [37]:
lbls[225:250]

array([2., 0., 1., 1., 2., 0., 0., 0., 1., 2., 1., 1., 2., 0., 0., 1., 0.,
       2., 1., 2., 2., 1., 1., 0., 2.])

In [38]:
metrics.accuracy_score(lbls[225:250],p_lbl)

0.76

In [39]:
model.save('basic_cnn2.h5')

# Trying retraining VGG16

In [53]:
from keras.applications import VGG16
from keras.models import Model

In [51]:
base_model = VGG16(input_shape=(100,100,3),include_top=False,weights='imagenet')

In [52]:
base_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 100, 100, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 100, 100, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 100, 100, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 50, 50, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 50, 50, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 50, 50, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 25, 25, 128)       0         
__________

In [55]:
vgg_out = base_model.output
x = Flatten()(vgg_out)
x = Dense(1024,activation='relu')(x)
pred_layer = Dense(3,activation='softmax')(x)
model_vgg = Model(inputs=base_model.input,outputs=pred_layer)

In [56]:
for layer in base_model.layers:
    layer.trainable = False

In [57]:
model_vgg.compile(optimizer='Adam',loss='categorical_crossentropy')

In [60]:
trainX_vgg = np.zeros((len(trainX),100,100,3))

In [61]:
trainX.shape

(175, 100, 100, 1)

In [62]:
for i in np.arange(0,len(trainX)):
    for j in np.arange(0,3):
        trainX_vgg[i,:,:,j] = trainX[i,:,:,0]

In [63]:
trainX_vgg.shape

(175, 100, 100, 3)

In [79]:
valX_vgg = np.zeros((len(valX),100,100,3))

In [80]:
for i in np.arange(0,len(valX)):
    for j in np.arange(0,3):
        valX_vgg[i,:,:,j] = valX[i,:,:,0]

In [81]:
train_generator = train_datagen.flow(trainX_vgg, y=trainY, batch_size=batch_size,seed=seed)
val_generator = test_datagen.flow(valX_vgg,y=valY,batch_size=batch_size,seed=seed)

In [99]:
earlyStopping = EarlyStopping(monitor='val_loss', patience=2,verbose=2,min_delta=0.001,mode='min')
# modelCheckpoint = ModelCheckpoint()

In [100]:
model_vgg.fit_generator(train_generator,steps_per_epoch=500 // batch_size,epochs=10,validation_data=val_generator,validation_steps=200 // batch_size, callbacks=[earlyStopping])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 00005: early stopping


<keras.callbacks.History at 0x1381daf28>

In [101]:
predicted = model_vgg.predict(valX_vgg[0:20])

In [85]:
valY.shape

(50, 3)

In [107]:
pred_bin = predicted.copy()
pred_bin[pred_bin<0.95] = 0
pred_bin[pred_bin>0] = 1

In [108]:
p_lbl = []
for i in pred_bin:
    if i[0] == 1:
        p_lbl.append(0)
    elif i[1] == 1:
        p_lbl.append(1)
    elif i[2] == 1:
        p_lbl.append(2)

In [91]:
p_lbl = np.array(p_lbl)

In [92]:
p_lbl

array([2, 1, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2])

In [109]:
pred_bin

array([[0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.]], dtype=float32)

In [93]:
valY[:20]

array([[0., 0., 1.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.]], dtype=float32)

In [110]:
metrics.accuracy_score(valY[:20],pred_bin)

0.4

# other stuff

In [76]:
# dimensions of our images.
img_width, img_height = 100, 100

top_model_weights_path = 'bottleneck_fc_model.h5'
epochs = 50
batch_size = 16

In [83]:
from skimage.color import gray2rgb

In [84]:
trainX_rbg = np.zeros((175,100,100,3))
for idx,img in enumerate(trainX):
    t = img[:,:,0]
    trainX_rbg[idx] = gray2rgb(t)

In [87]:
trainX.shape

(175, 100, 100, 1)

In [89]:
valX_rbg = np.zeros((50,100,100,3))
for idx,img in enumerate(valX):
    t = img[:,:,0]
    valX_rbg[idx] = gray2rgb(t)

In [43]:
def save_bottlebeck_features():
    datagen = ImageDataGenerator(rescale=1. / 255)

    # build the VGG16 network
    model = VGG16(include_top=False, weights='imagenet', input_shape=(100,100,3))

    generator = datagen.flow(
        trainX_rbg,
        y = trainY,
        batch_size=batch_size,
        shuffle=False)
    bottleneck_features_train = model.predict_generator(
        generator, len(trainX_rbg) // batch_size)
    np.save('bottleneck_features_train.npy',
            bottleneck_features_train)

    generator = datagen.flow(
        valX_rbg,
        y = valY,
        batch_size=batch_size,
        shuffle=False)
    bottleneck_features_validation = model.predict_generator(
        generator, len(valX_rbg) // batch_size)
    np.save('bottleneck_features_validation.npy',
            bottleneck_features_validation)


def train_top_model():
    train_data = np.load('bottleneck_features_train.npy')
    train_labels = np.array(
        [0] * (len(trainX_rbg) // 2) + [1] * (len(trainX_rbg) // 2))

    validation_data = np.load('bottleneck_features_validation.npy')
    validation_labels = np.array(
        [0] * (len(valX_rbg) // 2) + [1] * (len(valX_rbg) // 2))

    model = Sequential()
    model.add(Flatten(input_shape=train_data.shape[1:]))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='softmax'))

    model.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy', metrics=['accuracy'])

    model.fit(train_data, train_labels,
              epochs=epochs,
              batch_size=batch_size,
              validation_data=(validation_data, validation_labels))
    model.save_weights(top_model_weights_path)
