In [4]:
# https://github.com/sdcubber/Keras-Sequence-boilerplate/blob/master/Keras-Sequence.ipynb

import os
import matplotlib
matplotlib.use("Agg")

import tensorflow as tf
from tensorflow import keras
from keras.utils import Sequence

import pandas as pd
import numpy as np

In [2]:
# DATA PATHS

TRAIN_FOLDER_PATH = '../../data/train/'
VAL_FODLER_PATH = '../../data/validation/'
TEST_FOLDER_PATH = '../../data/test/'
PREVIEW_IMAGES_FOLDER = '../../data/preview/'

TRAIN_CSV_PATH = '../../data/train_labels.csv'
VALIDATION_CSV_PATH = '../../data/validation_labels.csv'

In [3]:
train = pd.read_csv(TRAIN_CSV_PATH)
train.head()

Unnamed: 0,image_id,is_parasitized
0,cell_1.png,1
1,cell_10.png,1
2,cell_1000.png,1
3,cell_1001.png,1
4,cell_1002.png,1


In [12]:
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
import math
import random

def load_image(im):
    return img_to_array(load_img(im, grayscale=False)) / 255

class DataSequence(Sequence):
    def __init__(self, df, data_path, batch_size, mode='train'):
        self.df = df
        self.bsz = batch_size
        self.mode = mode
        
        # Take labels and a list of image locations in memory
        self.labels = self.df['is_parasitized'].values
        self.im_list = self.df['image_id'].apply(lambda x: os.path.join(data_path, x)).to_list()
    
    def __len__(self):
        return int(math.ceil(len(self.df) / float(self.bsz)))
    
    def on_epoch_end(self):
        # Shuffles indexes after each epoch if in training mode
        self.indexes = range(len(self.im_list))
        if self.mode == 'train':
            self.indexes = random.sample(self.indexes, k=len(self.indexes))
            
    def get_batch_labels(self, idx):
        # Fetch a batch of labels
        return self.labels[idx * self.bsz: (idx + 1) * self.bsz]
    
    def get_batch_features(self, idx):
        # Fetch a batch of inputs
        return np.array([load_image(im) for im in self.im_list[idx * self.bsz: (1 + idx) * self.bsz]])
    
    def __getitem__(self, idx):
        batch_x = self.get_batch_features(idx)
        batch_y = self.get_batch_labels(idx)
        
        return batch_x, batch_y

In [13]:
from keras.models import Model
from keras.layers import Input, Conv2D, Dense, Dropout, MaxPool2D, Flatten

im_size = 64

x = Input(shape=(im_size, im_size, 3))
conv_1 = MaxPool2D()(Conv2D(32, (3,3), activation='relu')(x))
conv_2 = MaxPool2D()(Conv2D(32, (3,3), activation='relu')(conv_1))
conv_3 = MaxPool2D()(Conv2D(32, (3,3), activation='relu')(conv_2))
flat = Flatten()(conv_3)
dense_1 = Dropout(0.2)(Dense(32, activation='relu')(flat))
output = Dense(1, activation='sigmoid')(dense_1)

model = Model(inputs=x, outputs=output)
model.compile(optimizer='sgd', loss='binary_crossentropy')

In [16]:
seq = DataSequence(train, TRAIN_FOLDER_PATH, batch_size=20)
model.fit_generator(seq, epochs=5, verbose=1, use_multiprocessing=False, workers=1)

Epoch 1/5


FileNotFoundError: [Errno 2] No such file or directory: '../../data/train/cell_3323.png'