In [None]:
import numpy as np
import pandas as pd 
import os
from datetime import datetime
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.resnet50 import ResNet50
from keras.models import Sequential
from keras import layers
from sklearn.model_selection import train_test_split
from keras import optimizers
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint


In [None]:
#Loading data 
print(os.listdir("../data"))

train_dir = "../data/train/"
test_dir = "../data/test"

df_train = pd.read_csv('../data/train_labels.csv',dtype=str)
df_train.head()

In [None]:
type(df_train.id)

In [None]:
df_test=pd.read_csv("../data/sample_submission.csv",dtype=str)

# add extension to image filenames 
def append_ext(fn): 
    return fn+".tif"
df_train["id"]=df_train["id"].apply(append_ext)
df_test["id"]=df_test["id"].apply(append_ext)

print(df_train.head())

In [None]:
type(df_train.id[1])

In [None]:
df = df_train

In [None]:

train_datagen = ImageDataGenerator(preprocessing_function=lambda x:(x - x.mean()) / x.std() if x.std() > 0 else x,
                                   horizontal_flip=True,
                                   vertical_flip=True,
                                  validation_split=0.2)

test_datagen = ImageDataGenerator(preprocessing_function=lambda x:(x - x.mean()) / x.std() if x.std() > 0 else x)

In [None]:
# use flow_from_dataframe method to build train and valid generator
# Only shuffle the train generator as we want valid generator to have the same structure as test

train_generator = train_datagen.flow_from_directory(
                '../data/train/',
                target_size=(96, 96),
                classes=['0', '1'],
                batch_size=64,
                shuffle=True,    
                subset='training',
                class_mode='binary'
                )

valid_generator = train_datagen.flow_from_directory(
                '../data/train/',
                target_size=(96, 96),
                classes=['0', '1'],
                batch_size=64,
                shuffle=False,    
                subset='validation',
                class_mode='binary'
                )



In [None]:
# model definition

IMG_SIZE = (96, 96)
IN_SHAPE = (*IMG_SIZE, 3)

dropout_dense=0.5

conv_base = ResNet50(
    weights='imagenet',
    include_top=False,
    input_shape=IN_SHAPE
)
       
model = Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(256, use_bias=False))
model.add(layers.BatchNormalization())
model.add(layers.Activation("relu"))
model.add(layers.Dropout(dropout_dense))
model.add(layers.Dense(1, activation = "sigmoid"))

conv_base.summary()

In [None]:
# freeze layer. Unfreeze starts at layer conv2_block1_1_conv. 
# If freeze everything, val acc is really bad

conv_base.Trainable=True

set_trainable=False
for layer in conv_base.layers:
    if layer.name == 'conv2_block1_1_conv':
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False

In [None]:
# conv_base.trainable = False
model.compile(optimizers.Adam(0.01), loss = "binary_crossentropy", metrics=["accuracy"])

In [None]:
# datetime object containing current date and time
now = datetime.now()
dt_string = now.strftime("%d%m%Y_%H%M%S")
print("date and time =", dt_string)
filepath = "model_resnet_"+ dt_string +".h5"
print(filepath)

In [None]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size

checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, 
                             save_best_only=True, mode='auto')
earlystopper = EarlyStopping(monitor='val_accuracy', patience=2, verbose=1, restore_best_weights=True)
reducel = ReduceLROnPlateau(monitor='val_accuracy', patience=1, verbose=1, factor=0.3, min_lr=0.00001)

history = model.fit(train_generator, steps_per_epoch=STEP_SIZE_TRAIN, 
                    validation_data=valid_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=8,
                   callbacks=[reducel, earlystopper, checkpoint])

In [None]:
model.weights

In [None]:
# predict test and submission
from glob import glob
from skimage.io import imread

test_files = glob(os.path.join(test_dir + '/images','*.tif'))
submission = pd.DataFrame()
file_batch = 5000
max_idx = len(test_files)


In [None]:
test_files[1].split('/')[-1].split('\\')[-1].split(".")[0]

In [None]:
for idx in range(0, max_idx, file_batch):
    print("Indexes: %i - %i"%(idx, idx+file_batch))
    test_df = pd.DataFrame({'path': test_files[idx:idx+file_batch]})
    test_df['id'] = test_df.path.map(lambda x: x.split('/')[-1].split('\\')[-1].split(".")[0])
    test_df['image'] = test_df['path'].map(imread)
    K_test = np.stack(test_df["image"].values)
    K_test = (K_test - K_test.mean()) / K_test.std()
    predictions = model.predict(K_test)
    test_df['label'] = predictions
    submission = pd.concat([submission, test_df[["id", "label"]]])
submission.head()

In [None]:
# save file
submission.to_csv("../submission/submission_resnet_"+dt_string+".csv", index = False, header = True)

In [None]:
# other checks
val_loss, val_acc = model.evaluate(valid_generator)

print('val_loss:', val_loss)
print('val_acc:', val_acc)

In [None]:
test_generator = test_datagen.flow_from_directory('../data/test/',
                                        target_size=(96, 96),
                                        batch_size=1,
                                        class_mode='binary',
                                        shuffle=False)

In [None]:
predictions = model.predict(test_generator, verbose=1)

In [None]:
print(predictions.shape)
print(test_data.class_indices)