In [1]:
import numpy as np
import pandas as pd
#import seaborn as sns
from glob import glob
from PIL import Image
import matplotlib.pylab as plt
from sklearn.model_selection import train_test_split
from imutils import paths

from subprocess import check_output

import keras
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint
from keras.applications.densenet import preprocess_input


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
train_images = glob("C:/Users/jdu12/Desktop/humpback/train/*jpg")
test_images = glob("C:/Users/jdu12/Desktop/humpback/test/*jpg")
df = pd.read_csv("C:/Users/jdu12/Desktop/humpback/train.csv")


In [3]:
# load a single image to np array
def get_image(img_path):
    img = load_img(img_path, target_size=(224, 224))#.convert('L')
    img = img_to_array(img)
    img = preprocess_input(img)
    return img

In [4]:
# loading train images
trainPaths = list(paths.list_images('C:/Users/jdu12/Desktop/humpback/train/'))
print(len(trainPaths))
train_images = np.array([get_image(img_path) for img_path in trainPaths])
print(train_images.shape)

9850
(9850, 224, 224, 3)


In [6]:
def to_one_hot(labels):
    one_hot = []
    seen_id = set()
    id2class = dict()
    counter = 0
    for id in labels:
        if id not in seen_id:
            seen_id.add(id)
            one_hot.append(counter)
            id2class[id] = counter
            counter += 1
        else:
            one_hot.append(id2class[id])
    one_hot = to_categorical(one_hot, num_classes = 4251)
    return one_hot

In [7]:
labels = df['Id']
labels = to_one_hot(labels)
#print(labels[0:10])

In [8]:
# split the data into train and val
# The dataset is super unbalanced, as there are many classes that contains only 1 image
# As a result, train/val data cannot be split before generating more data by augmentation


In [9]:
# define data generator
#use of an image generator for preprocessing and data augmentation
x_train = train_images.astype("float32")
y_train = labels

# define data generator
image_gen = ImageDataGenerator(
    #featurewise_center=True,
    #featurewise_std_normalization=True,
    rescale=1./255,
    rotation_range=15,
    width_shift_range=.15,
    height_shift_range=.15,
    horizontal_flip=True)

In [10]:
# acquire the model
model = keras.applications.densenet.DenseNet121(include_top=True, weights=None, classes=4251)

In [11]:
batch_size = 32
epochs = 180

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model_dir = 'C:/Users/jdu12/Desktop/humpback/saved_model/DenseNet/'
routine_dir = model_dir + "routine-{epoch:02d}-{acc:.2f}.hdf5"
routine_save = ModelCheckpoint(routine_dir, monitor='acc', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=10)
best_dir = model_dir + "best-{epoch:02d}-{acc:.2f}.hdf5"
best_save = ModelCheckpoint(best_dir, monitor='acc', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)


model.fit_generator(image_gen.flow(x_train, y_train, batch_size=batch_size),
          steps_per_epoch=  x_train.shape[0]//batch_size,
          epochs=epochs, verbose=1, callback=[routine_save, best_save])
          #class_weight=class_weight_dic)



TypeError: fit_generator() got an unexpected keyword argument 'callback'

In [19]:
# save model
model.save(model_dir + 'first_DenseNet.hdf5')

In [2]:
# loading trained model
model = load_model('C:/Users/jdu12/Desktop/humpback/saved_model/DenseNet/' + 'first_DenseNet.hdf5')

In [7]:
# loading test images
testPaths = list(paths.list_images('C:/Users/jdu12/Desktop/humpback/test/'))
print(len(testPaths))
test_images = np.array([get_image(img_path) for img_path in trainPaths])
print(test_images.shape)

15610
(9850, 224, 224, 3)


In [121]:
preds = model.predict(preprocess_input(train_images[0:200]))
print(preds.shape)
#print(train_images[0:600].shape)
#print(preds.shape)
print(preds[0:600,7])

(200, 4251)
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1.]


In [13]:
# testing and generating submission file
import warnings
from os.path import split

with open("sample_submission.csv","w") as f:
    with warnings.catch_warnings():
        f.write("Image,Id\n")
        warnings.filterwarnings("ignore",category=DeprecationWarning)
        #for image in test_images:
            #img = ImportImage( image)
            #x = img.astype( "float32")
            #applying preprocessing to test images
            #x = image_gen.standardize( x.reshape(1,SIZE,SIZE))
            
        #x = image
        y = model.predict(test_images)#.reshape(1,SIZE,SIZE,1))
        predicted_args = np.argsort(y)[0][::-1][:5]
        predicted_tags = lohe.inverse_labels( predicted_args)
        image = split(image)[-1]
        predicted_tags = " ".join( predicted_tags)
        f.write("%s,%s\n" %(image, predicted_tags))

KeyboardInterrupt: 