In [53]:
import random
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import models
from tensorflow.keras import layers

BATCH_SIZE = 32
VLD_BATCH_SIZE = 50
TRAIN_BATCH_SIZE = 30
SHUFFLE_BUFFER_SIZE = 32
IMG_SIZE = 224

TRAIN_DIR = './data/pre_512_30/train_images/'
TRAIN_DIR = './data/pre/train_images/'

TRAIN_DF = pd.read_csv('./data/train.csv')
TRAIN_SET = [0,2]
TRAIN_CNTS = [900, 900]
VLD_CNTS = [99, 99]
TRAIN_CNT = 1800
VLD_CNT = 198

for i in range(100):
    TRAIN_DF = TRAIN_DF.sample(frac = 1).reset_index(drop=True)

In [2]:
def divintosets(df, train, test, classes):
    traindic = {'filename' : [], 'class' : []}
    testdic = {'filename' : [], 'class' : []}
    for i in range(len(classes)):
        fn = [x + '.png' for x in df['id_code'][df['diagnosis'] == classes[i]].values]
        traindic['filename'] += fn[:train[i]]
        traindic['class'] += [str(classes[i])] * train[i]
        testdic['filename'] += fn[train[i] : train[i] + test[i]]
        testdic['class'] += [str(classes[i])] * test[i]
    train_df = pd.DataFrame(data = traindic)
    test_df = pd.DataFrame(data = testdic)
    for i in range(10):
        train_df = train_df.sample(frac = 1).reset_index(drop=True)
        test_df = test_df.sample(frac = 1).reset_index(drop=True)
    return train_df, test_df

train_df, test_df = divintosets(TRAIN_DF, TRAIN_CNTS, VLD_CNTS, TRAIN_SET) 

In [3]:
gen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255.)
tr_gen = gen.flow_from_dataframe(train_df, 
                                 target_size = (IMG_SIZE, IMG_SIZE), 
                                 class_mode = 'binary',
                                 directory = TRAIN_DIR,
                                 batch_size = TRAIN_BATCH_SIZE
                                 )
vld_gen = gen.flow_from_dataframe(test_df, 
                                 target_size = (IMG_SIZE, IMG_SIZE), 
                                 class_mode = 'binary',
                                 directory = TRAIN_DIR,
                                  batch_size = 1,
                                  shuffle = False
                                 )

Found 1800 validated image filenames belonging to 2 classes.
Found 198 validated image filenames belonging to 2 classes.


In [4]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3,3), activation = 'relu', input_shape=(IMG_SIZE,IMG_SIZE,3)))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(64, (3,3), activation = 'relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(128, (3,3), activation = 'relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Flatten())
#model.add(layers.Dropout(0.4))
model.add(layers.Dense(512, activation = 'relu'))
model.add(layers.Dense(1,activation = 'sigmoid'))

base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.Adam(lr=base_learning_rate),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 222, 222, 32)      896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 111, 111, 32)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 109, 109, 64)      18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 54, 54, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 52, 52, 128)       73856     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 26, 26, 128)       0         
_________________________________________________________________
flatten (Flatten)            (None, 86528)             0

In [5]:

history = model.fit(tr_gen,
                    steps_per_epoch = TRAIN_CNT//TRAIN_BATCH_SIZE,
                    epochs = 3,
                    validation_data = vld_gen,
                    validation_steps = VLD_CNT//VLD_BATCH_SIZE)

Train for 60 steps, validate for 3 steps
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [16]:
pred_df = TRAIN_DF.copy()
v = [x + '.png' for x in pred_df['id_code'].values]
pred_df['id_code'] = v
v = [str(x%2) for x in pred_df['diagnosis'].values]
pred_df['diagnosis'] = v
pred_df.columns = ['filename','class']
pred_gen = gen.flow_from_dataframe(pred_df, 
                                 target_size = (IMG_SIZE, IMG_SIZE), 
                                 class_mode = 'binary',
                                 directory = TRAIN_DIR,
                                  batch_size = 64,
                                  shuffle = False
                                 )

Found 3662 validated image filenames belonging to 2 classes.


In [18]:
import time
start = time.time()
vv = model.predict(pred_gen)
end = time.time()
print(end - start)

109.63593435287476


In [None]:
for c1,c2 in vld_gen:
    break

In [None]:
c1 = c1.reshape(224,224,3)

In [None]:
plt.imshow(c1)

In [None]:
TRAIN_DIR + test_df['filename'][0]

In [None]:
file_path = TRAIN_DIR + test_df['filename'][0]
img = tf.io.read_file(file_path)
img = tf.image.decode_png(img, channels=3)
img = tf.image.convert_image_dtype(img, tf.float32)
#img = (img/255)
#model.predict(tf.reshape(img,(1,224,224,3)))[0,0]
#test_df['class'][0]
print("min = {}, max = {}".format(np.min(img), np.max(img)))
plt.imshow(img)

In [None]:
np.min(img)

In [None]:
dictt = {'p' : [], 'c' : []}
acc = 0.0
for i in range(test_df.shape[0]):  
    x = vv[i][0]
    ccc = test_df.iat[i,1]
    if x > 0.5 and ccc == '2':
        acc += 1.0
    if x <=0.5 and ccc == '0':
        acc += 1.0
    dictt['p'].append(x)
    dictt['c'].append(ccc)       
print(acc / 198)  
newdf = pd.DataFrame(dictt)

In [None]:
newdf

In [None]:
test_df

In [None]:
dictt = {'p' : [], 'c' : []}
acc = 0.0
for _,fn in test_df.iterrows():  
    file_path = TRAIN_DIR + fn['filename']
    img = tf.io.read_file(file_path)
    img = tf.image.decode_png(img, channels=3)
    img = tf.image.convert_image_dtype(img, tf.float32)
    img = (img/255) - 1
    x = model.predict(tf.reshape(img,(1,224,224,3)))
    x = x[0,0]
    if x > 0.5 and fn['class'] == '2':
        acc += 1.0
    if x <=0.5 and fn['class'] == '0':
        acc += 1.0
    dictt['p'].append(x)
    dictt['c'].append(fn['class'])       
print(acc / 198)  
newdf = pd.DataFrame(dictt)

In [None]:
newdf

In [None]:
dictt['p'] += 4.9
print(dictt['p'])

In [None]:
for _,row in test_df.iterrows():
    print(row['filename'],row['class'])
    break

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()