In [97]:
import random
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import models
from tensorflow.keras import layers

BATCH_SIZE = 32
VLD_BATCH_SIZE = 50
TRAIN_BATCH_SIZE = 30
SHUFFLE_BUFFER_SIZE = 32
IMG_SIZE = 224

TRAIN_DIR = './data/pre_512/train_images/'
TRAIN_DIR = './data/pre/train_images/'

TRAIN_DF = pd.read_csv('./data/train.csv')
TRAIN_SET = [0,2]
TRAIN_CNTS = [900, 900]
VLD_CNTS = [99, 99]
TRAIN_CNT = 1800
VLD_CNT = 1000

for i in range(100):
    TRAIN_DF = TRAIN_DF.sample(frac = 1).reset_index(drop=True)

In [98]:
def divintosets(df, train, test, classes):
    traindic = {'filename' : [], 'class' : []}
    testdic = {'filename' : [], 'class' : []}
    for i in range(len(classes)):
        fn = [x + '.png' for x in df['id_code'][df['diagnosis'] == classes[i]].values]
        traindic['filename'] += fn[:train[i]]
        traindic['class'] += [str(classes[i])] * train[i]
        testdic['filename'] += fn[train[i] : train[i] + test[i]]
        testdic['class'] += [str(classes[i])] * test[i]
    train_df = pd.DataFrame(data = traindic)
    test_df = pd.DataFrame(data = testdic)
    for i in range(10):
        train_df = train_df.sample(frac = 1).reset_index(drop=True)
        test_df = test_df.sample(frac = 1).reset_index(drop=True)
    return train_df, test_df

train_df, test_df = divintosets(TRAIN_DF, TRAIN_CNTS, VLD_CNTS, TRAIN_SET) 

In [112]:
gen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255.)
tr_gen = gen.flow_from_dataframe(train_df, 
                                 target_size = (IMG_SIZE, IMG_SIZE), 
                                 class_mode = 'binary',
                                 directory = TRAIN_DIR,
                                 batch_size = TRAIN_BATCH_SIZE
                                 )
vld_gen = gen.flow_from_dataframe(test_df, 
                                 target_size = (IMG_SIZE, IMG_SIZE), 
                                 class_mode = 'binary',
                                 directory = TRAIN_DIR,
                                  batch_size = 1,
                                  shuffle = False
                                 )

Found 1800 validated image filenames belonging to 2 classes.
Found 198 validated image filenames belonging to 2 classes.


In [75]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3,3), activation = 'relu', input_shape=(IMG_SIZE,IMG_SIZE,3)))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(64, (3,3), activation = 'relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(128, (3,3), activation = 'relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation = 'relu'))
model.add(layers.Dense(1,activation = 'sigmoid'))

base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.Adam(lr=base_learning_rate),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])

model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_21 (Conv2D)           (None, 222, 222, 32)      896       
_________________________________________________________________
max_pooling2d_21 (MaxPooling (None, 111, 111, 32)      0         
_________________________________________________________________
conv2d_22 (Conv2D)           (None, 109, 109, 64)      18496     
_________________________________________________________________
max_pooling2d_22 (MaxPooling (None, 54, 54, 64)        0         
_________________________________________________________________
conv2d_23 (Conv2D)           (None, 52, 52, 128)       73856     
_________________________________________________________________
max_pooling2d_23 (MaxPooling (None, 26, 26, 128)       0         
_________________________________________________________________
flatten_7 (Flatten)          (None, 86528)            

In [76]:

history = model.fit(tr_gen,
                    steps_per_epoch = TRAIN_CNT//TRAIN_BATCH_SIZE,
                    epochs = 10,
                    validation_data = vld_gen,
                    validation_steps = VLD_CNT//VLD_BATCH_SIZE)

Train for 60 steps, validate for 20 steps
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [113]:
vv = model.predict(vld_gen)

In [None]:
for _,c2 in vld_gen:
    print(c2)

In [111]:
vld_gen

<keras_preprocessing.image.dataframe_iterator.DataFrameIterator at 0x24d52c8da88>

In [85]:
TRAIN_DIR + test_df['filename'][0]

'./data/pre/train_images/e03a74e7d74f.png'

In [None]:
file_path = TRAIN_DIR + test_df['filename'][0]
img = tf.io.read_file(file_path)
img = tf.image.decode_png(img, channels=3)
img = tf.image.convert_image_dtype(img, tf.float32)
img = (img/255) - 1
model.predict(tf.reshape(img,(1,224,224,3)))[0,0]
test_df['class'][0]

In [115]:
vv.shape

(198, 1)

In [132]:
dictt = {'p' : [], 'c' : []}
acc = 0.0
for i in range(test_df.shape[0]):  
    x = vv[i][0]
    ccc = test_df.iat[i,1]
    if x > 0.5 and ccc == '2':
        acc += 1.0
    if x <=0.5 and ccc == '0':
        acc += 1.0
    dictt['p'].append(x)
    dictt['c'].append(ccc)       
print(acc / 198)  
newdf = pd.DataFrame(dictt)

0.9343434343434344


In [133]:
newdf

Unnamed: 0,p,c
0,0.934599,2
1,0.896167,2
2,0.000196,0
3,0.036894,2
4,0.844945,2
...,...,...
193,0.016268,0
194,0.954951,2
195,0.076002,0
196,0.834328,2


In [123]:
test_df

Unnamed: 0,filename,class
0,7550966ef777.png,2
1,57a5f1015504.png,2
2,b22cc1bf0b8a.png,0
3,9d98a0b585f2.png,2
4,a21b37719f9b.png,2
...,...,...
193,493d99f030e2.png,0
194,ea1d045f9fea.png,2
195,97c6cb55866d.png,0
196,c9d42d7534e0.png,2


In [129]:
dictt = {'p' : [], 'c' : []}
acc = 0.0
for _,fn in test_df.iterrows():  
    file_path = TRAIN_DIR + fn['filename']
    img = tf.io.read_file(file_path)
    img = tf.image.decode_png(img, channels=3)
    img = tf.image.convert_image_dtype(img, tf.float32)
    img = (img/255) - 1
    x = model.predict(tf.reshape(img,(1,224,224,3)))
    x = x[0,0]
    if x > 0.5 and fn['class'] == '2':
        acc += 1.0
    if x <=0.5 and fn['class'] == '0':
        acc += 1.0
    dictt['p'].append(x)
    dictt['c'].append(fn['class'])       
print(acc / 198)  
newdf = pd.DataFrame(dictt)

0.5


In [130]:
newdf

Unnamed: 0,p,c
0,0.970158,2
1,0.970163,2
2,0.969352,0
3,0.969474,2
4,0.969935,2
...,...,...
193,0.970024,0
194,0.970080,2
195,0.970140,0
196,0.970062,2


In [94]:
dictt['p'] += 4.9
print(dictt['p'])

[]


In [80]:
for _,row in test_df.iterrows():
    print(row['filename'],row['class'])
    break

e03a74e7d74f.png 0


In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()