In [1]:
import numpy as np
import pandas as pd
from glob import glob
from PIL import Image
import matplotlib.pylab as plt
from sklearn.model_selection import train_test_split
from imutils import paths

from subprocess import check_output

import keras
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint
from keras.applications.densenet import preprocess_input
#what

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [10]:
# load a single image to np array
def get_image(img_path):
    img = load_img(img_path, target_size=(299, 299))
    img = img_to_array(img)
    img = img/255
    return img

In [11]:
# loading train images
trainPaths = list(paths.list_images('C:/Users/jdu12/Desktop/humpback/train/'))
print(len(trainPaths))
train = np.array([get_image(img_path) for img_path in trainPaths])
print(train.shape)

9082
(9082, 299, 299, 3)


In [12]:
def to_one_hot(labels):
    one_hot_labels = []
    id2class = dict()
    class2id = dict()      ##!!! class(integer) to id(whale class)
    counter = 0
    for id in labels:
        if id not in id2class.keys():
            one_hot_labels.append(counter)
            id2class[id] = counter
            class2id[counter] = id
            counter += 1
        else:
            one_hot_labels.append(id2class[id])
    one_hot_labels = to_categorical(one_hot_labels, num_classes = 4251)
    return one_hot_labels, class2id, id2class

In [13]:
df = pd.read_csv("C:/Users/jdu12/Desktop/humpback/train.csv")

labels = df['Id']
labels, class2id, id2class = to_one_hot(labels)

In [14]:
# split the data into train and val
# The dataset is super unbalanced, as there are many classes that contains only 1 image
# As a result, train/val data cannot be split before generating more data by augmentation


In [15]:
# define data generator
#use of an image generator for preprocessing and data augmentation
image_gen = ImageDataGenerator(
    #featurewise_center=True,
    #featurewise_std_normalization=True,
    #rescale=1./255,
    rotation_range=15,
    width_shift_range=.15,
    height_shift_range=.15,
    horizontal_flip=True
)

In [16]:
# acquire the model
model = keras.applications.inception_resnet_v2.InceptionResNetV2(include_top=True, weights=None, classes=4251)

In [None]:
batch_size = 32
epochs = 180

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model_dir = 'C:/Users/jdu12/Desktop/humpback/saved_model/InceptionResnetV2/'
routine_dir = model_dir + "routine-{epoch:02d}-{acc:.2f}.hdf5"
routine_save = ModelCheckpoint(routine_dir, monitor='acc', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=10)
best_dir = model_dir + "best-{epoch:02d}-{acc:.2f}.hdf5"
best_save = ModelCheckpoint(best_dir, monitor='acc', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=3)


model.fit_generator(image_gen.flow(train, labels, batch_size=batch_size),
          steps_per_epoch=  train.shape[0]//batch_size,
          epochs=epochs, verbose=1, callbacks=[routine_save, best_save])
          #class_weight=class_weight_dic)



Epoch 1/180


In [5]:
# loading test images
testPaths = list(paths.list_images('C:/Users/jdu12/Desktop/humpback/test/'))
print(len(testPaths))
test_images = np.array([get_image(img_path) for img_path in testPaths])/255
print(test_images.shape)

15610
(15610, 299, 299, 3)


In [16]:
# loading trained model
model_name = 'routine-170-0.99'
model = load_model('C:/Users/jdu12/Desktop/humpback/saved_model/InceptionResnetV2/' + model_name+ '.hdf5')

In [17]:
# predict!
pred = model.predict(test_images)

In [18]:
# predict!
pred = model.predict(test_images)

# testing and generating submission file
import warnings
from os.path import split

pred_dir = "C:/Users/jdu12/Desktop/humpback/prediction/InceptionResnetV2/"

with open(pred_dir + model_name + ".csv", "w") as f:
    f.write("Image,Id\n")
    top_5 = np.argsort(pred)[:,-1:-6:-1]   # get the top 5 most likely classes
    for i in range(top_5.shape[0]):
        cur_tags = ''
        cur_image_name = testPaths[i].split('/')[-1]
        for j in range(5):
            cur_tags = cur_tags + ' ' + class2id[top_5[i][j]]
        f.write("%s,%s\n" %(cur_image_name, cur_tags))     