In [67]:
import random
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import models
from tensorflow.keras import layers

BATCH_SIZE = 32
VLD_BATCH_SIZE = 50
TRAIN_BATCH_SIZE = 30
SHUFFLE_BUFFER_SIZE = 32
IMG_SIZE = 224

TRAIN_DIR = './data/pre_512/train_images/'
TRAIN_DIR = './data/pre/train_images/'

TRAIN_DF = pd.read_csv('./data/train.csv')
TRAIN_SET = [0,2]
TRAIN_CNTS = [900, 900]
VLD_CNTS = [901, 99]
TRAIN_CNT = 1800
VLD_CNT = 1000

for i in range(100):
    TRAIN_DF = TRAIN_DF.sample(frac = 1).reset_index(drop=True)

In [68]:
def divintosets(df, train, test, classes):
    traindic = {'filename' : [], 'class' : []}
    testdic = {'filename' : [], 'class' : []}
    for i in range(len(classes)):
        fn = [x + '.png' for x in df['id_code'][df['diagnosis'] == classes[i]].values]
        traindic['filename'] += fn[:train[i]]
        traindic['class'] += [str(classes[i])] * train[i]
        testdic['filename'] += fn[train[i] : train[i] + test[i]]
        testdic['class'] += [str(classes[i])] * test[i]
    train_df = pd.DataFrame(data = traindic)
    test_df = pd.DataFrame(data = testdic)
    for i in range(10):
        train_df = train_df.sample(frac = 1).reset_index(drop=True)
        test_df = test_df.sample(frac = 1).reset_index(drop=True)
    return train_df, test_df

train_df, test_df = divintosets(TRAIN_DF, TRAIN_CNTS, VLD_CNTS, TRAIN_SET) 

In [69]:
gen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255.)
tr_gen = gen.flow_from_dataframe(train_df, 
                                 target_size = (IMG_SIZE, IMG_SIZE), 
                                 class_mode = 'binary',
                                 directory = TRAIN_DIR,
                                 batch_size = TRAIN_BATCH_SIZE
                                 )
vld_gen = gen.flow_from_dataframe(test_df, 
                                 target_size = (IMG_SIZE, IMG_SIZE), 
                                 class_mode = 'binary',
                                 directory = TRAIN_DIR,
                                  batch_size = VLD_BATCH_SIZE
                                 )

Found 1800 validated image filenames belonging to 2 classes.
Found 198 validated image filenames belonging to 2 classes.


In [70]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3,3), activation = 'relu', input_shape=(IMG_SIZE,IMG_SIZE,3)))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(64, (3,3), activation = 'relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(128, (3,3), activation = 'relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation = 'relu'))
model.add(layers.Dense(1,activation = 'sigmoid'))

base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.Adam(lr=base_learning_rate),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])

model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_18 (Conv2D)           (None, 222, 222, 32)      896       
_________________________________________________________________
max_pooling2d_18 (MaxPooling (None, 111, 111, 32)      0         
_________________________________________________________________
conv2d_19 (Conv2D)           (None, 109, 109, 64)      18496     
_________________________________________________________________
max_pooling2d_19 (MaxPooling (None, 54, 54, 64)        0         
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 52, 52, 128)       73856     
_________________________________________________________________
max_pooling2d_20 (MaxPooling (None, 26, 26, 128)       0         
_________________________________________________________________
flatten_6 (Flatten)          (None, 86528)            

In [71]:

history = model.fit(tr_gen,
                    steps_per_epoch = TRAIN_CNT//TRAIN_BATCH_SIZE,
                    epochs = 10,
                    validation_data = vld_gen,
                    validation_steps = VLD_CNT//VLD_BATCH_SIZE)

Train for 60 steps, validate for 6 steps
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
