In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

In [None]:
import cv2
import matplotlib.pyplot as plt

img_files = os.listdir('../input/train')[:10]
imgs = []
print(img_files)
print(len(img_files))
for img_file in img_files:
    img = cv2.imread(os.path.join('../input/train', img_file))
    imgs.append(img)

fig = plt.figure(figsize=(16,16))
for i in range(len(imgs)):
    plt.subplot(1,10,i+1)
    plt.imshow(cv2.cvtColor(imgs[i], cv2.COLOR_BGR2RGB))
    plt.xticks([])
    plt.yticks([])

plt.show()

In [None]:
train_set = pd.read_csv(os.path.join('../input/train_labels.csv'))
print(train_set.head())
print(train_set['label'].value_counts())

In [None]:
from sklearn.model_selection import train_test_split

train_data, val_data = train_test_split(train_set, test_size=0.1, random_state=2018)
train_data.to_csv('../working/train.csv', index=False)
val_data.to_csv('../working/val.csv', index=False)
train_len = len(train_data)

def read_images(path, image_id):
    image_path = os.path.join(path, image_id + '.tif')
    img = cv2.imread(image_path)
    img = img / 255
    
    return img
                
def generator(data, batch_size):
                
    while True:
        for df in pd.read_csv(data, chunksize=batch_size):
            
            img_names, label = df.id.values, df.label.values
            X = np.zeros((len(img_names), 96, 96, 3))
            
            for i, img_name in enumerate(img_names):
                X[i, :, :, :] = read_images(path='../input/train', image_id=img_name)
                
            yield X, label

del train_data, val_data, train_set

In [None]:
from sklearn import metrics

val_id = pd.read_csv('../working/val.csv').id.values
val_y = pd.read_csv('../working/val.csv').label.values
val_X = np.zeros((len(val_id), 96, 96, 3))
            
for i, img_name in enumerate(val_id):
    val_X[i, :, :, :] = read_images(path='../input/train', image_id=img_name)
print(val_X.shape)
print(val_y)

del val_id

In [None]:
from keras.applications import DenseNet121
from keras.models import Model
from keras.layers import Input, GlobalAvgPool2D, Dense, Dropout
from keras import metrics
from keras import optimizers
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

input_tensor = Input(shape=(96, 96, 3))
base_model = DenseNet121(input_tensor=input_tensor, include_top=False, weights='imagenet')
x = base_model.output
x = GlobalAvgPool2D()(x)
x = Dropout(0.1)(x)
predictions = Dense(1, activation='sigmoid')(x)
model = Model(inputs=input_tensor, outputs=predictions)
#model.summary()

In [None]:
from keras import losses

model.compile(optimizer= optimizers.Adam(lr=0.01, decay=0.01),
              loss=losses.binary_crossentropy, 
              metrics=[metrics.binary_accuracy])
callbacks = [EarlyStopping(patience=0),ReduceLROnPlateau(patience=0)]

In [None]:
train_gen = generator(data='../working/train.csv', batch_size=64)
#test_gen = generator(data='../working/val.csv', batch_size=64)
batch_size = 64
model.fit_generator(train_gen, verbose=1, steps_per_epoch=train_len/batch_size, epochs=100, 
                    callbacks=callbacks, validation_data=(val_X, val_y))

In [None]:
from sklearn import metrics

pred_glove_val_y = model.predict([val_X], batch_size=64, verbose=1)
print(pred_glove_val_y[pred_glove_val_y==1])
for thresh in np.arange(0.1, 0.501, 0.01):
    thresh = np.round(thresh, 2)
    print("F1 score at threshold {0} is {1}".format(thresh, metrics.f1_score(val_y, (pred_glove_val_y > thresh).astype(int))))

In [None]:
from tqdm import tqdm
test_set = os.listdir('../input/test')
len(test_set)

test_id = [test_image.replace('.tif', '') for test_image in test_set]
final_output=[]

for img_name in tqdm(test_id):
    test_X = read_images(path='../input/test', image_id=img_name)
    test_X = np.expand_dims(test_X,axis=0)
    output = model.predict([test_X], batch_size=1, verbose=0)
    output = (output > 0.5).astype(int)[0][0]
    #print(output)
    final_output.append(output)
    #print(final_output)
    
submission = pd.DataFrame({'id':test_id,
                          'label':final_output})

submission.head()
submission.to_csv('submission.csv', index=False)
print(os.listdir('./'))