In [1]:
import os
import zipfile
import shutil
import hashlib
from tqdm import tqdm_notebook
from keras import layers

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
train_path = 'train.zip'
test_path = 'test_stg1.zip'
data_base_dir = 'data'

In [3]:
if not os.path.exists(data_base_dir):
    os.makedirs(data_base_dir)

In [4]:
def extract_zip(zip_path, out_dir):
    name = os.path.basename(zip_path).split('.')[0]
    zip_ref = zipfile.ZipFile(zip_path, 'r')
    zip_ref.extractall(out_dir)
    zip_ref.close()
    return os.path.join(out_dir, name)

In [5]:
orig_train_dir = extract_zip(train_path, data_base_dir)

In [6]:
test_dir  = extract_zip(test_path, data_base_dir)

In [7]:
training_dir = os.path.join(data_base_dir, 'train_val_split', 'training')
validation_dir = os.path.join(data_base_dir, 'train_val_split', 'validation')

In [8]:
classes = [class_ for class_ in os.listdir(orig_train_dir) if os.path.isdir(os.path.join(orig_train_dir, class_))]

In [9]:
classes

['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT']

In [10]:
for class_ in tqdm_notebook(classes):
    
    class_orig_dir = os.path.join(orig_train_dir, class_)
    class_training_dir = os.path.join(training_dir, class_)
    class_validation_dir = os.path.join(validation_dir, class_)
    
    if not os.path.exists(class_training_dir):
        os.makedirs(class_training_dir)
        
    if not os.path.exists(class_validation_dir):
        os.makedirs(class_validation_dir)

    img_list = os.listdir(class_orig_dir)

    for img in img_list:
        hash_name = hashlib.sha1(img.encode('ascii'))
        if int(hash_name.hexdigest(), 16) % 1000 > 100:
            shutil.copy(os.path.join(class_orig_dir, img), class_training_dir)
        else:
            shutil.copy(os.path.join(class_orig_dir, img), class_validation_dir)

HBox(children=(IntProgress(value=0, max=8), HTML(value='')))




KeyboardInterrupt: 

In [11]:
from keras.applications.inception_resnet_v2 import InceptionResNetV2

In [12]:
from keras.applications.vgg16 import VGG16

In [13]:
conv_base = InceptionResNetV2(include_top=False) #VGG16(include_top=False) #

In [14]:
from keras import layers, models, optimizers

In [15]:
model = models.Sequential()

In [16]:
model.add(conv_base)
model.add(layers.GlobalAveragePooling2D())
model.add(layers.Dense(8, activation='softmax'))

In [17]:
conv_base.trainable = False

In [18]:
model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=1e-4), metrics=['accuracy'])

In [19]:
from keras.callbacks import ModelCheckpoint, EarlyStopping

checkpointer = ModelCheckpoint('quicksign_inception_resnet_512.h5', monitor='val_loss', save_best_only=True, verbose=1)
earlystopper = EarlyStopping(monitor='val_loss', patience=5)


In [20]:
from keras.preprocessing.image import ImageDataGenerator

In [21]:
train_data_gen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=40,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True)
validation_data_gen = ImageDataGenerator(rescale=1./255)

In [22]:
train_generator = train_data_gen.flow_from_directory(training_dir,
                                                    target_size=(512, 512),
                                                    batch_size=16,
                                                    class_mode='categorical')
validation_generator = validation_data_gen.flow_from_directory(validation_dir,
                                                    target_size=(512, 512),
                                                    batch_size=32,
                                                    class_mode='categorical')

Found 3393 images belonging to 8 classes.
Found 384 images belonging to 8 classes.


In [23]:
model.fit_generator(train_generator, epochs=50, validation_data=validation_generator, verbose=1,
                    callbacks=[checkpointer, earlystopper])

Epoch 1/50





Epoch 00001: val_loss improved from inf to 1.67566, saving model to quicksign_inception_resnet_512.h5
Epoch 2/50





Epoch 00002: val_loss improved from 1.67566 to 1.62451, saving model to quicksign_inception_resnet_512.h5
Epoch 3/50





Epoch 00003: val_loss improved from 1.62451 to 1.58095, saving model to quicksign_inception_resnet_512.h5
Epoch 4/50





Epoch 00004: val_loss improved from 1.58095 to 1.54704, saving model to quicksign_inception_resnet_512.h5
Epoch 5/50





Epoch 00005: val_loss did not improve from 1.54704
Epoch 6/50





Epoch 00006: val_loss improved from 1.54704 to 1.50690, saving model to quicksign_inception_resnet_512.h5
Epoch 7/50





Epoch 00007: val_loss did not improve from 1.50690
Epoch 8/50





Epoch 00008: val_loss did not improve from 1.50690
Epoch 9/50





Epoch 00009: val_loss improved from 1.50690 to 1.49133, saving model to quicksign_inception_resnet_512.h5
Epoch 10/50





Epoch 00010: val_loss did not improve from 1.49133
Epoch 11/50





Epoch 00011: val_loss improved from 1.49133 to 1.44322, saving model to quicksign_inception_resnet_512.h5
Epoch 12/50





Epoch 00012: val_loss did not improve from 1.44322
Epoch 13/50





Epoch 00013: val_loss did not improve from 1.44322
Epoch 14/50





Epoch 00014: val_loss did not improve from 1.44322
Epoch 15/50





Epoch 00015: val_loss did not improve from 1.44322
Epoch 16/50





Epoch 00016: val_loss did not improve from 1.44322


<keras.callbacks.History at 0x28134800908>

In [24]:
model.load_weights('quicksign_inception_resnet_512.h5')

In [25]:
conv_base.trainable = True
set_trainable = False
for layer in conv_base.layers:
    if 'conv_7b' in layer.name:
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False
    

In [26]:
model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=1e-5), metrics=['accuracy'])

In [38]:
model.fit_generator(train_generator, epochs=100, validation_data=validation_generator, verbose=1,
                    callbacks=[checkpointer, earlystopper])

Epoch 1/100





Epoch 00001: val_loss did not improve from 0.51281
Epoch 2/100





Epoch 00002: val_loss did not improve from 0.51281
Epoch 3/100





Epoch 00003: val_loss did not improve from 0.51281
Epoch 4/100





Epoch 00004: val_loss improved from 0.51281 to 0.50711, saving model to quicksign_inception_resnet_512.h5
Epoch 5/100





Epoch 00005: val_loss did not improve from 0.50711
Epoch 6/100





Epoch 00006: val_loss improved from 0.50711 to 0.50376, saving model to quicksign_inception_resnet_512.h5
Epoch 7/100





Epoch 00007: val_loss did not improve from 0.50376
Epoch 8/100





Epoch 00008: val_loss did not improve from 0.50376
Epoch 9/100





Epoch 00009: val_loss improved from 0.50376 to 0.50288, saving model to quicksign_inception_resnet_512.h5
Epoch 10/100





Epoch 00010: val_loss improved from 0.50288 to 0.50098, saving model to quicksign_inception_resnet_512.h5
Epoch 11/100





Epoch 00011: val_loss improved from 0.50098 to 0.49431, saving model to quicksign_inception_resnet_512.h5
Epoch 12/100





Epoch 00012: val_loss improved from 0.49431 to 0.49411, saving model to quicksign_inception_resnet_512.h5
Epoch 13/100





Epoch 00013: val_loss did not improve from 0.49411
Epoch 14/100





Epoch 00014: val_loss did not improve from 0.49411
Epoch 15/100





Epoch 00015: val_loss improved from 0.49411 to 0.49234, saving model to quicksign_inception_resnet_512.h5
Epoch 16/100





Epoch 00016: val_loss improved from 0.49234 to 0.48841, saving model to quicksign_inception_resnet_512.h5
Epoch 17/100





Epoch 00017: val_loss improved from 0.48841 to 0.47931, saving model to quicksign_inception_resnet_512.h5
Epoch 18/100





Epoch 00018: val_loss did not improve from 0.47931
Epoch 19/100





Epoch 00019: val_loss did not improve from 0.47931
Epoch 20/100





Epoch 00020: val_loss improved from 0.47931 to 0.47635, saving model to quicksign_inception_resnet_512.h5
Epoch 21/100





Epoch 00021: val_loss did not improve from 0.47635
Epoch 22/100





Epoch 00022: val_loss did not improve from 0.47635
Epoch 23/100





Epoch 00023: val_loss improved from 0.47635 to 0.47450, saving model to quicksign_inception_resnet_512.h5
Epoch 24/100





Epoch 00024: val_loss improved from 0.47450 to 0.46828, saving model to quicksign_inception_resnet_512.h5
Epoch 25/100





Epoch 00025: val_loss did not improve from 0.46828
Epoch 26/100





Epoch 00026: val_loss did not improve from 0.46828
Epoch 27/100





Epoch 00027: val_loss did not improve from 0.46828
Epoch 28/100





Epoch 00028: val_loss did not improve from 0.46828
Epoch 29/100





Epoch 00029: val_loss did not improve from 0.46828


<keras.callbacks.History at 0x2828175a400>

In [None]:
conv_base.trainable = True
set_trainable = False
for layer in conv_base.layers:
    if 'block_8_10' in layer.name:
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False
    



In [None]:
model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=1e-5), metrics=['accuracy'])



In [None]:
checkpointer = ModelCheckpoint('quicksign_inception_resnet++.h5', monitor='val_loss', save_best_only=True, verbose=1)
earlystopper = EarlyStopping(monitor='val_loss', patience=5)

In [None]:
model.fit_generator(train_generator, epochs=50, validation_data=validation_generator, verbose=1,
                    callbacks=[checkpointer, earlystopper])

In [39]:
test_data_gen = ImageDataGenerator(rescale=1./255)

test_generator = test_data_gen.flow_from_directory('data/test/',
                                                    target_size=(512, 512),
                                                    batch_size=64,
                                                    class_mode='categorical',
                                                  shuffle=False)

Found 13153 images belonging to 1 classes.


In [41]:
preds = model.predict_generator(test_generator, verbose=1)



In [42]:
preds.shape

(13153, 8)

In [43]:
import numpy as np
im_names = np.array(os.listdir('data/test'+'\\test_stg1'))

In [44]:
im_names = ['test_stg2/'+name if 'image' in name else name for name in im_names ]

In [45]:
import pandas as pd

In [46]:
df_names = pd.DataFrame({'image': im_names})

In [47]:
df_preds = pd.DataFrame(data=preds, columns=['ALB','BET','DOL','LAG','NoF','OTHER','SHARK','YFT'])

In [48]:
df_submission = pd.concat([df_names, df_preds], axis=1)

In [49]:
df_submission.to_csv('submission_512_2.csv', index=False)

In [50]:
df_submission.head()

Unnamed: 0,image,ALB,BET,DOL,LAG,NoF,OTHER,SHARK,YFT
0,test_stg2/image_00001.jpg,0.398164,0.0476,8.7e-05,0.001112,0.350479,0.012536,0.021999,0.168023
1,test_stg2/image_00002.jpg,0.488148,0.002543,0.003179,0.000232,0.450862,0.009787,0.018434,0.026816
2,test_stg2/image_00003.jpg,0.805502,0.001534,0.000842,7e-05,0.176604,0.01165,0.000154,0.003644
3,test_stg2/image_00004.jpg,0.329028,0.053449,0.121039,0.047917,0.050144,0.063259,0.051354,0.28381
4,test_stg2/image_00005.jpg,0.972795,0.004857,0.000636,2.8e-05,0.001979,0.004787,0.005128,0.009789


In [None]:
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot

SVG(model_to_dot(conv_base).create(prog='dot', format='svg'))