In [124]:
import random
import os
import numpy as np
import cv2
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import models
from tensorflow.keras import layers

BATCH_SIZE = 32
IMG_SIZE = 224

TRAIN_DIR = './data/pre/train_images/'
TEST_DIR = './data/pre/test_images/'
ROOT_DIR = './data/pre/'

TRAIN_DF = pd.read_csv('./data/pre/train.csv')
TEST_DF = pd.read_csv('./data/pre/test.csv')

CNTS = np.array([[1443, 362],
                 [295, 75],
                 [799, 200],
                 [154, 39],
                 [235, 60]])

#for i in range(100):
#    TRAIN_DF = TRAIN_DF.sample(frac = 1).reset_index(drop=True)

In [160]:
def GetSingleClass(df,c,n):
    lx = (df['class'] == c) & (df['type'] == 0)
    res = list(df[lx]['filename'].to_numpy())
    if lx.sum() > n:
        return res[:n]
    m = n - lx.sum()
    lx = (df['class'] == c) & (df['type'] != 0)
    lst = list(df[lx].sort_values(by='type')['filename'].to_numpy())
    res += lst[:m]
    return res
    
def GetClassesElements(df,cv,nv):
    res = []
    for i in range(len(cv)):
        res += GetSingleClass(df,cv[i],nv[i])
    return res
        
def GetGenerator(df,cv,nv,rootdir,bs = 32):
    fv1 = GetClassesElements(df,cv[0],nv[0])
    fv2 = GetClassesElements(df,cv[1],nv[1])
    df = pd.DataFrame({'filename' : fv1 + fv2, 'class' : ['0']*len(fv1) + ['1']*len(fv2)})
    for i in range(100):
        df = df.sample(frac = 1).reset_index(drop=True)
    gen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255.)
    tr_gen = gen.flow_from_dataframe(df,target_size = (IMG_SIZE, IMG_SIZE),directory = rootdir,
                                     class_mode = 'binary', batch_size = bs) 
    return tr_gen

In [178]:
train_batch_size = 32
train_set = [[1],[2,3,4]]
train_cnts = [[1188], [799,154,235]]
train_cnt = np.array([x for y in train_cnts for x in y]).sum()
train = GetGenerator(TRAIN_DF,train_set,train_cnts, TRAIN_DIR,bs = train_batch_size)

Found 2376 validated image filenames belonging to 2 classes.


In [179]:
test_batch_size = 32
test_set = [[1],[2,3,4]]
test_cnts = [[75], [25,25,25]]
test_cnt = np.array([x for y in test_cnts for x in y]).sum()
test = GetGenerator(TEST_DF,test_set,test_cnts, TEST_DIR,bs = test_batch_size)

Found 150 validated image filenames belonging to 2 classes.


In [180]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3,3), activation = 'relu', input_shape=(IMG_SIZE,IMG_SIZE,3)))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(64, (3,3), activation = 'relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(128, (3,3), activation = 'relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(256, (3,3), activation = 'relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.GlobalAveragePooling2D())
#model.add(layers.Dropout(0.4))
model.add(layers.Dense(256, activation = 'relu'))
model.add(layers.Dense(1,activation = 'sigmoid'))

base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.Adam(lr=base_learning_rate),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])

model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_7 (Conv2D)            (None, 222, 222, 32)      896       
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 111, 111, 32)      0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 109, 109, 64)      18496     
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 54, 54, 64)        0         
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 52, 52, 128)       73856     
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 26, 26, 128)       0         
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 24, 24, 256)      

In [None]:
history = model.fit(train,
                    steps_per_epoch = train_cnt//train_batch_size,
                    epochs = 5,
                    validation_data = test,
                    validation_steps = test_cnt//test_batch_size)

Train for 74 steps, validate for 4 steps
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

In [None]:
def divintosets(df, train, test, classes):
    traindic = {'filename' : [], 'class' : []}
    testdic = {'filename' : [], 'class' : []}
    clsid = 0
    for i in range(len(classes)):
        for j in range(len(classes[i])):
            lx = df['diagnosis'] == classes[i][j]
            fn = [x + '.png' for x in df['id_code'][lx].values]
            traindic['filename'] += fn[:train[i][j]]
            traindic['class'] += [str(clsid)] * train[i][j]
            testdic['filename'] += fn[train[i][j] : train[i][j] + test[i][j]]
            testdic['class'] += [str(clsid)] * test[i][j]
        clsid += 1
    train_df = pd.DataFrame(data = traindic)
    test_df = pd.DataFrame(data = testdic)
    for i in range(10):
        train_df = train_df.sample(frac = 1).reset_index(drop=True)
        test_df = test_df.sample(frac = 1).reset_index(drop=True)
    return train_df, test_df

train_df, test_df = divintosets(TRAIN_DF, TRAIN_CNTS, VLD_CNTS, TRAIN_SET) 

In [None]:
train_df

In [None]:
tran_gen = tf.keras.preprocessing.image.ImageDataGenerator(rotation_range = 40,
                                                          width_shift_range = 0.2,
                                                          height_shift_range = 0.2,
                                                          shear_range = 0.2,
                                                          zoom_range = 0.2,
                                                          fill_mode = 'nearest')
tr_gen = tran_gen.flow_from_dataframe(train_df,
                                      target_size = (IMG_SIZE, IMG_SIZE), 
                                      directory = TRAIN_DIR,
                                      save_to_dir = SAVE_TO_DIR
                                 ) 

In [None]:
#for img in tr_gen:
#    pass

In [2]:
p = './data/temp/'
df = pd.DataFrame({'filename' : ['xxx.png','yyy.png', 'zzz.png'], 'class' : ['7','8','9']})

In [33]:
def dumy(img):
    sigma = np.random.choice(25,1)[0]
    noise = np.random.normal(0,sigma,img.shape)
    img=cv2.addWeighted ( img.astype(float),1, noise ,1 ,0)
    lx = img < 0
    img[lx] = 0
    lx = img > 255
    img[lx] = 255
    return img

gen = tf.keras.preprocessing.image.ImageDataGenerator(rotation_range = 360,
                                                    preprocessing_function = dumy)
tr_gen = gen.flow_from_dataframe(df,
                                      target_size = (IMG_SIZE, IMG_SIZE), 
                                      directory = p,
                                      save_to_dir = p,
                                      class_mode = 'categorical',
                                 batch_size = 1,
                                 shuffle = True
                                 ) 

Found 3 validated image filenames belonging to 3 classes.


In [None]:
gen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255.)
tr_gen = gen.flow_from_dataframe(train_df, 
                                 target_size = (IMG_SIZE, IMG_SIZE), 
                                 class_mode = 'binary',
                                 directory = TRAIN_DIR,
                                 batch_size = TRAIN_BATCH_SIZE
                                 )
vld_gen = gen.flow_from_dataframe(test_df, 
                                 target_size = (IMG_SIZE, IMG_SIZE), 
                                 class_mode = 'binary',
                                 directory = TRAIN_DIR,
                                  batch_size = 1,
                                  shuffle = False
                                 )

In [None]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3,3), activation = 'relu', input_shape=(IMG_SIZE,IMG_SIZE,3)))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(64, (3,3), activation = 'relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(128, (3,3), activation = 'relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Flatten())
#model.add(layers.Dropout(0.4))
model.add(layers.Dense(512, activation = 'relu'))
model.add(layers.Dense(1,activation = 'sigmoid'))

base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.Adam(lr=base_learning_rate),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'])

model.summary()

In [None]:

history = model.fit(tr_gen,
                    steps_per_epoch = TRAIN_CNT//TRAIN_BATCH_SIZE,
                    epochs = 3,
                    validation_data = vld_gen,
                    validation_steps = VLD_CNT//VLD_BATCH_SIZE)

In [None]:
pred_df = TRAIN_DF.copy()
v = [x + '.png' for x in pred_df['id_code'].values]
pred_df['id_code'] = v
v = [str(x%2) for x in pred_df['diagnosis'].values]
pred_df['diagnosis'] = v
pred_df.columns = ['filename','class']
pred_gen = gen.flow_from_dataframe(pred_df, 
                                 target_size = (IMG_SIZE, IMG_SIZE), 
                                 class_mode = 'binary',
                                 directory = TRAIN_DIR,
                                  batch_size = 64,
                                  shuffle = False
                                 )

In [None]:
import time
start = time.time()
vv = model.predict(pred_gen)
end = time.time()
print(end - start)

In [None]:
for c1,c2 in vld_gen:
    break

In [None]:
c1 = c1.reshape(224,224,3)

In [None]:
plt.imshow(c1)

In [None]:
TRAIN_DIR + test_df['filename'][0]

In [None]:
file_path = TRAIN_DIR + test_df['filename'][0]
img = tf.io.read_file(file_path)
img = tf.image.decode_png(img, channels=3)
img = tf.image.convert_image_dtype(img, tf.float32)
#img = (img/255)
#model.predict(tf.reshape(img,(1,224,224,3)))[0,0]
#test_df['class'][0]
print("min = {}, max = {}".format(np.min(img), np.max(img)))
plt.imshow(img)

In [None]:
np.min(img)

In [None]:
dictt = {'p' : [], 'c' : []}
acc = 0.0
for i in range(test_df.shape[0]):  
    x = vv[i][0]
    ccc = test_df.iat[i,1]
    if x > 0.5 and ccc == '2':
        acc += 1.0
    if x <=0.5 and ccc == '0':
        acc += 1.0
    dictt['p'].append(x)
    dictt['c'].append(ccc)       
print(acc / 198)  
newdf = pd.DataFrame(dictt)

In [None]:
newdf

In [None]:
test_df

In [None]:
dictt = {'p' : [], 'c' : []}
acc = 0.0
for _,fn in test_df.iterrows():  
    file_path = TRAIN_DIR + fn['filename']
    img = tf.io.read_file(file_path)
    img = tf.image.decode_png(img, channels=3)
    img = tf.image.convert_image_dtype(img, tf.float32)
    img = (img/255) - 1
    x = model.predict(tf.reshape(img,(1,224,224,3)))
    x = x[0,0]
    if x > 0.5 and fn['class'] == '2':
        acc += 1.0
    if x <=0.5 and fn['class'] == '0':
        acc += 1.0
    dictt['p'].append(x)
    dictt['c'].append(fn['class'])       
print(acc / 198)  
newdf = pd.DataFrame(dictt)

In [None]:
newdf

In [None]:
dictt['p'] += 4.9
print(dictt['p'])

In [None]:
for _,row in test_df.iterrows():
    print(row['filename'],row['class'])
    break

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()