In [2]:
from __future__ import absolute_import, division, print_function, unicode_literals
from random import shuffle
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import IPython.display as display
from PIL import Image
keras = tf.keras

TRAIN_DIR = './data/pre/train_images/'
TEST_DIR = './data/pre/test_images/'
BATCH_SIZE = 32
SHUFFLE_BUFFER_SIZE = 1000
TRAIN_DF = pd.read_csv('./data/train.csv')
TEST_DF = pd.read_csv('./data/test.csv')
LABLES = np.array([0,1,2,3,4])
IMG_SIZE = 224

In [30]:
df = TRAIN_DF

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3662 entries, 0 to 3661
Data columns (total 2 columns):
id_code      3662 non-null object
diagnosis    3662 non-null int64
dtypes: int64(1), object(1)
memory usage: 57.3+ KB


In [34]:
np.unique(df['diagnosis'].values,return_counts = True)

(array([0, 1, 2, 3, 4], dtype=int64),
 array([1805,  370,  999,  193,  295], dtype=int64))

In [35]:
def split(df,validation_split = 0.1,test_split = 0.0):
    S1={i:[] for i in range(5)}
    for i in range(df.shape[0]):
        S1[df['diagnosis'][i]].append(tuple(df.iloc[i,:].values))
    S2={'validation_set':[], 'test_set':[], 'train_set':[]}
    for el in S1:
        l1 = int(len(S1[el])*validation_split)
        S2['validation_set'] += S1[el][0:l1]
        l2 = int(len(S1[el])*test_split)
        S2['test_set'] += S1[el][l1:l1+l2]
        S2['train_set'] += S1[el][l1+l2:len(S1[el])]
    for el in S2:
        shuffle(S2[el])
    return S2

def create_img_lab_pairs(pairs):
    imgs = []
    labels = []
    for pair in pairs:
        file_path = TRAIN_DIR + pair[0] + ".png"
        img = tf.io.read_file(file_path)
        img = tf.image.decode_png(img, channels=3)
        img = tf.image.convert_image_dtype(img, tf.float32)
        img = (img/127.5) - 1
        imgs.append(img)
        cat_val = np.array([0,0,0,0,0])
        if (pair[1] == 0) or (pair[1] == 1):
            cat_val = 0
        else:
            cat_val = 1
        #cat_val[pair[1]] = 1
        labels.append(cat_val)
    imgs = tf.data.Dataset.from_tensor_slices(imgs)
    labels = tf.data.Dataset.from_tensor_slices(np.array(labels))
    return tf.data.Dataset.zip((imgs,labels))

def create_split_datasets(vsplit = 0.1, tsplit = 0.0):
    S = split(TRAIN_DF,vsplit,tsplit)
    d1 = create_img_lab_pairs(S['validation_set'])
    d2 = create_img_lab_pairs(S['test_set'])
    d3 = create_img_lab_pairs(S['train_set'])
    return d1,d2,d3

In [36]:
valid = 0.1
test = 0.0
vset, tsset, trset = create_split_datasets(valid,test)

In [42]:
vset

<ZipDataset shapes: ((224, 224, 3), ()), types: (tf.float32, tf.int32)>

In [23]:
train_batches = trset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
validation_batches = vset.batch(BATCH_SIZE)
test_batches = tsset.batch(BATCH_SIZE)

for image_batch, label_batch in train_batches.take(1):
    pass

In [29]:
label_batch

<tf.Tensor: id=127764, shape=(32,), dtype=int32, numpy=
array([0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1,
       0, 0, 0, 1, 0, 1, 0, 1, 0, 0])>

In [24]:
label_batch

<tf.Tensor: id=127764, shape=(32,), dtype=int32, numpy=
array([0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1,
       0, 0, 0, 1, 0, 1, 0, 1, 0, 0])>

In [25]:
drop_out_rate = 0.2

conv1_layer = tf.keras.layers.Conv2D(32,(3,3),activation = 'relu',input_shape=(IMG_SIZE,IMG_SIZE,3))
conv1_batch = conv1_layer(image_batch)

maxPool1_layer = tf.keras.layers.MaxPooling2D((2,2))
maxPool1_batch = maxPool1_layer(conv1_batch)

conv2_layer = tf.keras.layers.Conv2D(64,(3,3),activation = 'relu')
conv2_batch = conv2_layer(maxPool1_batch)

maxPool2_layer = tf.keras.layers.MaxPooling2D((2,2))
maxPool2_batch = maxPool2_layer(conv2_batch)

conv3_layer = tf.keras.layers.Conv2D(128,(3,3),activation = 'relu')
conv3_batch = conv3_layer(maxPool2_batch)

maxPool3_layer = tf.keras.layers.MaxPooling2D((2,2))
maxPool3_batch = maxPool2_layer(conv3_batch)

conv4_layer = tf.keras.layers.Conv2D(256,(3,3),activation = 'relu')
conv4_batch = conv4_layer(maxPool3_batch)

maxPool4_layer = tf.keras.layers.MaxPooling2D((2,2))
maxPool4_batch = maxPool2_layer(conv4_batch)

flatten_layer = tf.keras.layers.Flatten()
flatten_batch = flatten_layer(maxPool4_batch)

dense1_layer = tf.keras.layers.Dense(1024,activation = 'relu')
dense1_batch = dense1_layer(flatten_batch)

dropout_layer2 = keras.layers.Dropout(rate = drop_out_rate)
dropout_batch2 = dropout_layer2(dense1_batch)

prediction_layer = tf.keras.layers.Dense(1)#len(LABLES))
prediction_batch = prediction_layer(dropout_batch2)

model = tf.keras.Sequential([conv1_layer, maxPool1_layer, conv2_layer, maxPool2_layer, conv3_layer, maxPool3_layer,
                             conv4_layer, maxPool4_layer,flatten_layer,
                             dense1_layer, dropout_layer2, prediction_layer])

base_learning_rate = 0.0001
'''model.compile(optimizer=tf.keras.optimizers.Adam(lr=base_learning_rate),
              loss=tf.keras.losses. CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])'''
model.compile(optimizer=tf.keras.optimizers.Adam(lr=base_learning_rate),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])
model.summary()


Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_15 (Conv2D)           (None, 222, 222, 32)      896       
_________________________________________________________________
max_pooling2d_15 (MaxPooling (None, 111, 111, 32)      0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 109, 109, 64)      18496     
_________________________________________________________________
max_pooling2d_16 (MaxPooling (None, 54, 54, 64)        0         
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 52, 52, 128)       73856     
_________________________________________________________________
max_pooling2d_17 (MaxPooling (None, 26, 26, 128)       0         
_________________________________________________________________
conv2d_18 (Conv2D)           (None, 24, 24, 256)      

In [26]:
initial_epochs = 10
validation_steps=20
history = model.fit(train_batches,
                    epochs=initial_epochs,
                    validation_data=validation_batches)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
 14/104 [===>..........................] - ETA: 6:11 - loss: 0.6934 - accuracy: 0.5192

KeyboardInterrupt: 

In [39]:
train_batches = trset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
validation_batches = vset.batch(BATCH_SIZE)
test_batches = tsset.batch(BATCH_SIZE)
IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)

# Create the base model from the pre-trained model MobileNet V2
base_model = tf.keras.applications.ResNet50V2(input_shape=IMG_SHAPE, include_top=False, weights='imagenet')
for image_batch, label_batch in train_batches.take(1):
    pass
base_model.trainable = True
NumOfLayers = len(base_model.layers) - 0
print("Number of layers in the base model: ", NumOfLayers)

for layer in base_model.layers[:NumOfLayers]:
    layer.trainable =  False
#base_model.summary()

Number of layers in the base model:  190


In [40]:
drop_out_rate = 0.4
dense_layer_number = 512

feature_batch = base_model(image_batch)
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
feature_batch_average = global_average_layer(feature_batch)

dropout_layer1 = keras.layers.Dropout(rate = drop_out_rate)
dropout_batch1 = dropout_layer1(feature_batch_average)

dense_hiden_layer = keras.layers.Dense(dense_layer_number,activation = 'relu',kernel_regularizer = keras.regularizers.l2(0.001))
dense_hiden_batch = dense_hiden_layer(dropout_batch1)

dropout_layer2 = keras.layers.Dropout(rate = drop_out_rate)
dropout_batch2 = dropout_layer2(dense_hiden_batch)

#prediction_layer = keras.layers.Dense(len(LABLES),kernel_regularizer = keras.regularizers.l2(0.001))
prediction_layer = keras.layers.Dense(1,kernel_regularizer = keras.regularizers.l2(0.001))
prediction_batch = prediction_layer(dropout_batch2)

model = tf.keras.Sequential([base_model, global_average_layer, dropout_layer1,dense_hiden_layer, dropout_layer2, prediction_layer])
#model = tf.keras.Sequential([base_model, global_average_layer,dropout_layer2, prediction_layer])

base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.Adam(lr=base_learning_rate),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),# CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50v2 (Model)           (None, 7, 7, 2048)        23564800  
_________________________________________________________________
global_average_pooling2d (Gl (None, 2048)              0         
_________________________________________________________________
dropout_6 (Dropout)          (None, 2048)              0         
_________________________________________________________________
dense_12 (Dense)             (None, 512)               1049088   
_________________________________________________________________
dropout_7 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_13 (Dense)             (None, 1)                 513       
Total params: 24,614,401
Trainable params: 1,049,601
Non-trainable params: 23,564,800
__________________________________

In [41]:
initial_epochs = 10
validation_steps=20
history = model.fit(train_batches,
                    epochs=initial_epochs,
                    validation_data=validation_batches)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [40]:
trpath = './data/prepre/train/'
valpath = './data/prepre/validate/'
vset, tsset, trset = create_split_datasets()
base_model = tf.keras.applications.ResNet50V2(input_shape=(IMG_SIZE, IMG_SIZE, 3),include_top=False, weights='imagenet')
base_model.trainable = False
for tfimg, tflable in vset.batch(1).take(1):
    pass
feature_batch = base_model(tfimg)
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
feature_batch_average = global_average_layer(feature_batch)
model = tf.keras.Sequential([base_model,global_average_layer])

In [89]:
for i in range(5):
    imgs = []
    pathh = './data/prepre/validate/{}'.format(i)
    for img, lable in vset.batch(1):
        if lable != i:
            continue
        img = model.predict(img)
        imgs.append(img)
    imgs = np.array(imgs)
    imgs.reshape(imgs.shape[0],imgs.shape[2])
    np.save(pathh,np.array(imgs))

In [90]:
for i in range(5):
    imgs = []
    pathh = './data/prepre/train/{}'.format(i)
    for img, lable in trset.batch(1):
        if lable != i:
            continue
        img = model.predict(img)
        imgs.append(img)
    imgs = np.array(imgs)
    imgs.reshape(imgs.shape[0],imgs.shape[2])
    np.save(pathh,np.array(imgs))