In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

/kaggle/input/ifood-2019-fgvc6/test_set.zip
/kaggle/input/ifood-2019-fgvc6/train_labels.csv
/kaggle/input/ifood-2019-fgvc6/ifood2019_sample_submission.csv
/kaggle/input/ifood-2019-fgvc6/val_set.zip
/kaggle/input/ifood-2019-fgvc6/train_set.zip
/kaggle/input/ifood-2019-fgvc6/val_labels.csv
/kaggle/input/ifood-2019-fgvc6/sample_submission.csv
/kaggle/input/ifood-2019-fgvc6/class_list.txt


In [2]:
from zipfile import ZipFile
import os

if not os.path.exists("./train_set"):
    for file_name in ['train_set.zip', 'val_set.zip', 'test_set.zip']:
        with ZipFile('../input/ifood-2019-fgvc6/' + file_name, 'r') as zipObj:
            print("unzipping", file_name)
            zipObj.extractall('./')

for dirname, _, _ in os.walk('./'):
    print(dirname)

unzipping train_set.zip
unzipping val_set.zip
unzipping test_set.zip
./
./val_set
./test_set
./train_set


In [3]:
df_train = pd.read_csv('../input/ifood-2019-fgvc6/train_labels.csv')
df_val   = pd.read_csv('../input/ifood-2019-fgvc6/val_labels.csv')
df_test = pd.read_csv('../input/ifood-2019-fgvc6/sample_submission.csv')

df_train['label'] = df_train['label'].astype(str)
df_val['label'] = df_val['label'].astype(str)

train_size = df_train.shape[0]
val_size = df_val.shape[0]
test_size = df_test.shape[0]
num_classes = df_train['label'].nunique()

In [4]:
from keras.preprocessing.image import ImageDataGenerator

# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,
    zoom_range=0.2,
    fill_mode='nearest',
    horizontal_flip=True)

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(
    rescale=1./255)

Using TensorFlow backend.


In [5]:
# this is a generator that will read pictures found in
# subfolers of 'data/train', and indefinitely generate
# batches of augmented image data

batch_size  = 64

train_generator = train_datagen.flow_from_dataframe(
    df_train,
    x_col='img_name',
    y_col='label',    
    class_mode='input',
    directory='./train_set/',
    shuffle=True,
#     color_mode='grayscale',
    target_size=(256, 256),
    batch_size=batch_size)

val_generator = train_datagen.flow_from_dataframe(
    df_val,
    x_col='img_name',
    y_col='label',    
    class_mode='input',
    directory='./val_set/',
    shuffle=False,
#     color_mode='grayscale',
    target_size=(256, 256),
    batch_size=batch_size)

test_generator = test_datagen.flow_from_dataframe(
    df_test,
    x_col='img_name',
    directory='./test_set/',
    class_mode='input',
    shuffle=False,
#     color_mode='grayscale',
    target_size=(256, 256),
    batch_size=1)

Found 118475 validated image filenames.
Found 11994 validated image filenames.
Found 28377 validated image filenames.


In [6]:
import tensorflow as tf
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D
from keras.models import Sequential
from keras import backend as K
from keras.callbacks import ModelCheckpoint

# network parameters
input_shape = (256, 256, 3)

# # detect and init the TPU
# tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
# tf.config.experimental_connect_to_cluster(tpu)
# tf.tpu.experimental.initialize_tpu_system(tpu)

# # instantiate a distribution strategy
# tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

# # instantiating the model in the strategy scope creates the model on the TPU
# with tpu_strategy.scope():
# #     model = tf.keras.Sequential( … ) # define your model normally
# #     model.compile( … )
#     # encoder
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=input_shape))
model.add(MaxPooling2D((2, 2), padding='same'))
model.add(Conv2D(16, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D((2, 2), padding='same'))
model.add(Conv2D(8, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D((2, 2), padding='same'))
model.add(Conv2D(8, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D((2, 2), padding='same'))

# decoder
# at this point the representation is (16, 16, 8) i.e. 512-dimensional
model.add(Conv2D(8, (3, 3), activation='relu', padding='same'))
model.add(UpSampling2D((2, 2)))
model.add(Conv2D(8, (3, 3), activation='relu', padding='same'))
model.add(UpSampling2D((2, 2)))
model.add(Conv2D(16, (3, 3), activation='relu', padding='same'))
model.add(UpSampling2D((2, 2)))
model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
model.add(UpSampling2D((2, 2)))
model.add(Conv2D(3, (3, 3), activation='sigmoid', padding='same'))

model.compile(optimizer='adadelta', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 256, 256, 32)      896       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 128, 128, 32)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 128, 128, 16)      4624      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 64, 64, 16)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 64, 64, 8)         1160      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 32, 32, 8)         0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 32, 32, 8)        

In [7]:
# Save best model
from keras.callbacks import TensorBoard

filepath       = "autoencoder.hdf5"
checkpoint     = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

epochs      = 50
num_samples = 10000
hist = model.fit_generator(train_generator,
                            steps_per_epoch=num_samples/batch_size,
                            validation_data=val_generator,
                            validation_steps=len(val_generator),
                            callbacks=callbacks_list,                                 
                            epochs=epochs)

Epoch 1/50

Epoch 00001: val_loss improved from inf to 0.56067, saving model to autoencoder.hdf5
Epoch 2/50

Epoch 00002: val_loss improved from 0.56067 to 0.55730, saving model to autoencoder.hdf5
Epoch 3/50

Epoch 00003: val_loss improved from 0.55730 to 0.54065, saving model to autoencoder.hdf5
Epoch 4/50

Epoch 00004: val_loss did not improve from 0.54065
Epoch 5/50

Epoch 00005: val_loss did not improve from 0.54065
Epoch 6/50

Epoch 00006: val_loss improved from 0.54065 to 0.53416, saving model to autoencoder.hdf5
Epoch 7/50

Epoch 00007: val_loss improved from 0.53416 to 0.52648, saving model to autoencoder.hdf5
Epoch 8/50

Epoch 00008: val_loss did not improve from 0.52648
Epoch 9/50

Epoch 00009: val_loss did not improve from 0.52648
Epoch 10/50

Epoch 00010: val_loss did not improve from 0.52648
Epoch 11/50

Epoch 00011: val_loss did not improve from 0.52648
Epoch 12/50

Epoch 00012: val_loss did not improve from 0.52648
Epoch 13/50

Epoch 00013: val_loss did not improve from

In [8]:
import shutil
for path in ["./train_set", "./val_set", "./test_set"]:
    shutil.rmtree(path)