In [1]:
from resnet_builder import resnet
from tensorflow import keras
import os
import glob
import numpy as np
from keras.preprocessing.image import  img_to_array, load_img
from PIL import Image
from keras.callbacks import EarlyStopping
from tensorflow.python.keras.applications import ResNet50
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator
from tensorflow.python.keras.layers import Dense, Dropout, Flatten, Activation, BatchNormalization
from tensorflow.python.keras.models import Sequential, Model
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint
import time
from sklearn.model_selection import train_test_split
import cv2

Using TensorFlow backend.


In [2]:
size = (256, 256)
nbofdata=700
base_path = r'ml100-03-final/image_data/train/'
layers_of_folders=0
folder_list=[]    
categories=['daisy','dandelion', 'rose', 'sunflower', 'tulip']

if base_path :
    folder_layers=[]
    files = os.scandir(base_path)
    #  Get the 1st layer of folder
    first_folder = []
    first_folder_kind = []
    for entry in files:
        if entry.is_dir():
            first_folder.append(entry.path)
            first_folder_kind.append(entry.name)
    folder_layers.append(first_folder_kind)
    folder_list.append(first_folder)

In [3]:
datanumber=nbofdata
blob=[]
blob_nparray=[]
image_data=[]
conc = 0
labels_dict={}
fnamelist = {}
for entry1 in folder_list[layers_of_folders - 1]:
    blob = []
    cellname = os.path.basename(os.path.dirname(entry1))  # extract cell name
    # print(cellname)
    concnames = os.path.basename(entry1)  # extract concentration
    # print(concnames)
    if concnames in categories:
        labels_dict[conc] = concnames
        fnamelist = glob.glob(os.path.join(entry1, '*.jpg'))
        for filename in fnamelist[0:datanumber]:
            im = Image.open(filename)
            if im is not None:
                if im.mode=='RGB':
                    im=im.resize(size,Image.BILINEAR)
                    imarray = np.array(im)
                    blob.append(imarray)
        ind = np.reshape(np.arange(1, len(blob) + 1), (-1, 1))
        blob_nparray = np.reshape(np.asarray(blob), (len(blob), blob[1].size))
        blob_nparray = np.hstack((blob_nparray, ind, conc * np.ones((len(blob), 1))))
        image_data.append(np.asarray(blob_nparray, dtype=np.float32))
        print(concnames+'  finished!')
        conc += 1

sunflower  finished!
tulip  finished!
dandelion  finished!
daisy  finished!
rose  finished!


In [4]:
for j in range(len(categories)):
    trytry=image_data[j][:]
# Prepare data
    LengthT = trytry.shape[0]
    trytry_index = trytry[...,-2:-1]
    trytry_label = trytry[...,-1:] #['Nega' for x in range(lengthN*4)] #Nega_data[...,-1:]
    trytry = trytry[...,:-2]
    
    # Normalize image by subtracting mean image
    trytry -= np.reshape(np.mean(trytry, axis=1), (-1,1))
    # Reshape images
    trytry = np.reshape(trytry, (trytry.shape[0],256,256,3))
    
#    # Rotate images
#    for i in range(3):
#        trytry[LengthT*(i+1):LengthT*(i+2)] = np.rot90(trytry[:LengthT], i+1, (1,2))
    # Add channel dimension to fit in Conv2D
    trytry = trytry.reshape(-1,256,256,3)
    np.random.shuffle(trytry)
    trytry_train_upto = round(trytry.shape[0] * 8 / 10)
    trytry_test_upto = trytry.shape[0]
    if j is 0:
        train_data = trytry[:trytry_train_upto]
        test_data = trytry[trytry_train_upto:trytry_test_upto]
        train_label = trytry_label[:trytry_train_upto]
        test_label = trytry_label[trytry_train_upto:trytry_test_upto]
        
    else:
        train_data = np.concatenate((train_data, 
                                     trytry[:trytry_train_upto]), axis=0)
        
        test_data = np.concatenate((test_data, 
                                    trytry[trytry_train_upto:trytry_test_upto]), axis=0)
        
        train_label = np.concatenate((train_label, 
                                     trytry_label[:trytry_train_upto]), axis=0)
        
        
        test_label = np.concatenate((test_label, 
                                    trytry_label[trytry_train_upto:trytry_test_upto]), axis=0)
        
test_label = keras.utils.to_categorical(test_label, num_classes=len(categories))
train_label = keras.utils.to_categorical(train_label, num_classes=len(categories))

In [5]:
import random
temp = list(zip(train_data, train_label))

random.shuffle(temp)

train_data,train_label = zip(*temp)

train_data=np.asarray(train_data)
train_label=np.asarray(train_label)

In [6]:
weight_path = 'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'

In [7]:
def create_model_from_ResNet50():

    """
     Use ResNet-50 (this model's code is from https://www.kaggle.com/cokastefan/keras-resnet-50)
    """
    model = Sequential()

    model.add(ResNet50(include_top=False, pooling='avg', weights=weight_path))
    model.add(Flatten())
    model.add(BatchNormalization())
    model.add(Dense(2048, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dense(1024, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dense(len(categories), activation='softmax'))

    model.layers[0].trainable = False
    
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc']) # optimizer=RMSprop(lr=0.001)
    
    return model

In [8]:
model_ResNet50 = create_model_from_ResNet50()
model_ResNet50.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, 2048)              23587712  
_________________________________________________________________
flatten (Flatten)            (None, 2048)              0         
_________________________________________________________________
batch_normalization (BatchNo (None, 2048)              8192      
_________________________________________________________________
dense (Dense)                (None, 2048)              4196352   
_________________________________________________________________
batch_normalization_1 (Batch (None, 2048)              8192      
_________________________________________________________________
dense_1 (Dense)              (None, 1024)              2098176   
_________________________________________________________________
batch_normalization_2 (Batch (None, 1024)              4

In [9]:
batch_size = 32
epochs = 60

In [10]:
# Adding rescale, rotation_range, width_shift_range, height_shift_range,
# shear_range, zoom_range, and horizontal flip to our ImageDataGenerator
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.4,
    height_shift_range=0.4,
    shear_range=0.2,
    zoom_range=0.3,
    horizontal_flip=True
)

# Note that the validation data should not be augmented!
val_datagen = ImageDataGenerator(
    rescale=1./255,
#     rotation_range=40,
#     width_shift_range=0.2,
#     height_shift_range=0.2,
#     shear_range=0.2,
#     zoom_range=0.2,
#     horizontal_flip=True
)

# Flow training images in batches of 32 using train_datagen generator
train_generator = train_datagen.flow(
    train_data,
    train_label,
    batch_size=batch_size
)

val_generator = val_datagen.flow(
    test_data,
    test_label,
    batch_size=batch_size
)

In [11]:
model_ckpt = ModelCheckpoint(filepath="./tmp.h5", 
                             monitor="val_loss", 
                             save_best_only=True)

EStop = EarlyStopping(monitor='val_acc', min_delta=0, 
                      patience=10, verbose=1, mode='auto')


start = time.time()

model_ResNet50_info = model_ResNet50.fit_generator(
    generator=train_generator, 
    steps_per_epoch=len(train_data)/batch_size,
    epochs=epochs, 
    validation_steps=len(test_data)/batch_size,
    validation_data=val_generator, 
    verbose=2,
    callbacks=[model_ckpt, EStop]
)

end = time.time()
duration = end - start

Epoch 1/60
71/70 - 863s - loss: 1.4263 - acc: 0.7378 - val_loss: 7.5812 - val_acc: 0.1735
Epoch 2/60
71/70 - 821s - loss: 0.5989 - acc: 0.8198 - val_loss: 2.7385 - val_acc: 0.1681
Epoch 3/60
71/70 - 818s - loss: 0.4486 - acc: 0.8512 - val_loss: 3.8713 - val_acc: 0.1770
Epoch 4/60
71/70 - 836s - loss: 0.4136 - acc: 0.8525 - val_loss: 3.6994 - val_acc: 0.2142
Epoch 5/60
71/70 - 849s - loss: 0.3570 - acc: 0.8756 - val_loss: 3.7341 - val_acc: 0.1770
Epoch 6/60
71/70 - 878s - loss: 0.3375 - acc: 0.8866 - val_loss: 8.8353 - val_acc: 0.2425
Epoch 7/60
71/70 - 916s - loss: 0.3298 - acc: 0.8875 - val_loss: 8.0553 - val_acc: 0.1770
Epoch 8/60
71/70 - 942s - loss: 0.2758 - acc: 0.9021 - val_loss: 5.8414 - val_acc: 0.1770
Epoch 9/60
71/70 - 910s - loss: 0.2451 - acc: 0.9167 - val_loss: 5.6205 - val_acc: 0.1770
Epoch 10/60
71/70 - 934s - loss: 0.2650 - acc: 0.9030 - val_loss: 4.2297 - val_acc: 0.1770
Epoch 11/60
71/70 - 898s - loss: 0.2249 - acc: 0.9203 - val_loss: 4.0855 - val_acc: 0.1770
Epoch 12

In [12]:
print ('\n model_ResNet50 took %0.2f seconds (%0.1f minutes) to train for %d epochs'%(duration, duration/60, epochs) )


 model_ResNet50 took 14035.43 seconds (233.9 minutes) to train for 60 epochs


In [13]:
model = keras.models.load_model("./tmp.h5")

In [14]:
size = (256, 256)
nbofdata=2000
base_path = r'ml100-03-final/image_data/test/'
layers_of_folders=0
folder_list=[]    
labels=['test']

if base_path :
    folder_layers=[]
    files = os.scandir(base_path)
    #  Get the 1st layer of folder
    first_folder = []
    first_folder_kind = []
    for entry in files:
        if entry.is_dir():
            first_folder.append(entry.path)
            first_folder_kind.append(entry.name)
    folder_layers.append(first_folder_kind)
    folder_list.append(first_folder)

In [15]:
datanumber=nbofdata
blob=[]
blob_nparray=[]
image_data=[]
conc = 0
fc = 0
labels_dict={}
fn = {}
for entry1 in folder_list[layers_of_folders - 1]:
    blob = []
    cellname = os.path.basename(os.path.dirname(entry1))  # extract cell name
    # print(cellname)
    concnames = os.path.basename(entry1)  # extract concentration
    # print(concnames)
    if concnames in labels:
        labels_dict[conc] = concnames
        fnamelist = glob.glob(os.path.join(entry1, '*.jpg'))
        for filename in fnamelist[0:datanumber]:
            im = Image.open(filename)
            if im is not None:
                if im.mode=='RGB':
                    im=im.resize(size,Image.BILINEAR)
                    imarray = np.array(im)
                    blob.append(imarray)
                    fn[fc] = filename
                    fc += 1
        ind = np.reshape(np.arange(1, len(blob) + 1), (-1, 1))
        blob_nparray = np.reshape(np.asarray(blob), (len(blob), blob[1].size))
        blob_nparray = np.hstack((blob_nparray, ind, conc * np.ones((len(blob), 1))))
        image_data.append(np.asarray(blob_nparray, dtype=np.float32))
        print(ind)
        print(blob_nparray)
        print(concnames+'  finished!')
        conc += 1

[[   1]
 [   2]
 [   3]
 ...
 [1998]
 [1999]
 [2000]]
[[1.190e+02 1.110e+02 1.100e+01 ... 3.000e+00 1.000e+00 0.000e+00]
 [1.980e+02 2.150e+02 2.270e+02 ... 8.500e+01 2.000e+00 0.000e+00]
 [3.700e+01 6.300e+01 3.700e+01 ... 3.500e+01 3.000e+00 0.000e+00]
 ...
 [1.080e+02 1.230e+02 3.000e+01 ... 0.000e+00 1.998e+03 0.000e+00]
 [1.380e+02 1.720e+02 1.990e+02 ... 2.000e+00 1.999e+03 0.000e+00]
 [0.000e+00 0.000e+00 0.000e+00 ... 1.000e+00 2.000e+03 0.000e+00]]
test  finished!


In [16]:
fn[0]

'ml100-03-final/image_data/test/test/b38d1fef59f487bf8e702c5eab79880d.jpg'

In [17]:
sp = {}
for i in range(2000):
    sp[i] = fn[i].split('/')
    sp[i] = sp[i][4].split('.jpg')

In [18]:
for i in range(10):
    print(sp[i][0])

b38d1fef59f487bf8e702c5eab79880d
4cd32ea34f68e3b43c73341b8fb3d4c0
aabbd368642e6843bb6f15a3afaa9ed0
71469fb90f914a3639e7691ea2a64214
6c6fc0a1bd638792e341c75949c76428
fd2580a8f500b27baf6913759b29c003
499790bb426abd7f293270ff2a357984
d79a3d0a0e8120333f1ea82aaaad1dd0
ff7eac29b6d7a33fbd8009677c3e9c58
8ceefea6d56655f1689ae14a20c0f8be


In [19]:
ids = {}
for i in range(2000):
    ids[i] = sp[i][0]

In [20]:
for j in range(len(labels)):
    trytry=image_data[j][:]
# Prepare data
    LengthT = trytry.shape[0]

    trytry_index = trytry[...,-2:-1]

    trytry_label = trytry[...,-1:] #['Nega' for x in range(lengthN*4)] #Nega_data[...,-1:]

    trytry = trytry[...,:-2]

    # Normalize image by subtracting mean image
    trytry -= np.reshape(np.mean(trytry, axis=1), (-1,1))
    # Reshape images
    trytry = np.reshape(trytry, (trytry.shape[0],256,256,3))
    
#    # Rotate images
#    for i in range(3):
#        trytry[LengthT*(i+1):LengthT*(i+2)] = np.rot90(trytry[:LengthT], i+1, (1,2))
    # Add channel dimension to fit in Conv2D
    trytry = trytry.reshape(-1,256,256,3)
    trytry_test_upto = trytry.shape[0]
    if j is 0:
        test_data = trytry[:trytry_test_upto]      
    else:     
        test_data = np.concatenate((test_data, 
                                    trytry[trytry_train_upto:trytry_test_upto]), axis=0)

In [21]:
test_data.shape

(2000, 256, 256, 3)

In [22]:
predictions=model.predict(test_data)

In [23]:
for i in range(10):
    for j in range(5):
        print(predictions[i][j])

3.5982463e-05
5.8403054e-17
1.913208e-05
0.9999449
1.0906862e-12
0.9999999
6.4368e-08
3.7248065e-09
1.591024e-12
6.4554287e-15
5.0635878e-09
8.1437385e-20
1.0
3.6114584e-10
1.1175388e-17
0.99999964
6.986518e-12
3.262669e-07
1.4432815e-09
3.1808473e-12
0.0037337795
1.2769132e-09
0.29285094
0.7034152
4.1086555e-08
4.7331164e-06
5.399462e-14
6.231142e-06
0.99998903
7.83112e-12
1.0
2.499573e-20
8.275651e-11
1.5456631e-12
1.5356637e-15
5.9839056e-14
0.99997413
2.499118e-12
1.2965865e-13
2.5899793e-05
4.1844833e-11
5.02455e-19
1.0
3.979181e-09
4.276488e-16
1.534344e-12
0.9999951
1.7914715e-15
6.8845238e-12
4.9317377e-06


In [24]:
import matplotlib.pyplot as plt
from keras.preprocessing.image import array_to_img

plt.imshow(array_to_img(test_data[1]))
plt.show()

<Figure size 640x480 with 1 Axes>

In [25]:
cnn_pred = {}
for i in range(2000):
    for j in range(5):
        if predictions[i][j] == np.max(predictions[i]):
            if j == 0:
                cnn_pred[i] = 3
            if j == 1:
                cnn_pred[i] = 4
            if j == 2:
                cnn_pred[i] = 1
            if j == 3:
                cnn_pred[i] = 0
            if j == 4:
                cnn_pred[i] = 2

In [26]:
cnn_pred

{0: 0,
 1: 3,
 2: 1,
 3: 3,
 4: 0,
 5: 0,
 6: 3,
 7: 4,
 8: 1,
 9: 4,
 10: 2,
 11: 2,
 12: 4,
 13: 0,
 14: 0,
 15: 4,
 16: 3,
 17: 0,
 18: 1,
 19: 0,
 20: 1,
 21: 3,
 22: 4,
 23: 4,
 24: 3,
 25: 0,
 26: 1,
 27: 4,
 28: 0,
 29: 4,
 30: 4,
 31: 2,
 32: 4,
 33: 3,
 34: 4,
 35: 1,
 36: 3,
 37: 2,
 38: 0,
 39: 1,
 40: 1,
 41: 4,
 42: 1,
 43: 4,
 44: 3,
 45: 3,
 46: 1,
 47: 0,
 48: 0,
 49: 1,
 50: 0,
 51: 3,
 52: 0,
 53: 0,
 54: 1,
 55: 1,
 56: 4,
 57: 4,
 58: 1,
 59: 4,
 60: 3,
 61: 3,
 62: 0,
 63: 1,
 64: 4,
 65: 3,
 66: 1,
 67: 3,
 68: 1,
 69: 4,
 70: 1,
 71: 0,
 72: 1,
 73: 0,
 74: 0,
 75: 0,
 76: 2,
 77: 0,
 78: 1,
 79: 4,
 80: 1,
 81: 1,
 82: 3,
 83: 3,
 84: 0,
 85: 1,
 86: 4,
 87: 1,
 88: 4,
 89: 0,
 90: 3,
 91: 2,
 92: 2,
 93: 1,
 94: 3,
 95: 2,
 96: 4,
 97: 3,
 98: 0,
 99: 4,
 100: 3,
 101: 1,
 102: 0,
 103: 2,
 104: 1,
 105: 1,
 106: 3,
 107: 4,
 108: 4,
 109: 3,
 110: 4,
 111: 4,
 112: 1,
 113: 3,
 114: 0,
 115: 4,
 116: 0,
 117: 3,
 118: 2,
 119: 0,
 120: 1,
 121: 1,
 122: 2,
 12

In [27]:
import pandas as pd
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

submit = pd.DataFrame({'id': ids, 'flower_class': cnn_pred})
header = ["id", "flower_class"]
submit.to_csv('cnn_resnet_predict_256.csv', columns = header, index=False)