In [1]:
import json
from itertools import chain
from math import ceil

from keras.preprocessing.image import ImageDataGenerator
import numpy as np
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
from keras.applications.resnet50 import ResNet50
from keras import losses
from keras.layers import Flatten, Dense, Dropout
from keras.layers.normalization import BatchNormalization

In [3]:
from keras import Model
from sklearn.model_selection import train_test_split

In [4]:
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

## Load ResNet model

In [5]:
base_model = ResNet50(weights='imagenet', include_top = False, input_shape=(224, 224, 3))

In [6]:
# print(base_model.summary())

In [7]:
label_count=227

In [8]:
x = Flatten()(base_model.output)
x = Dense(4096, activation='relu')(x)
x = Dropout(0.5)(x)
x = BatchNormalization()(x)
predictions = Dense(label_count, activation = 'sigmoid')(x)

In [9]:
head_model = Model(input = base_model.input, output = predictions)

  if __name__ == '__main__':


In [10]:
head_model.load_weights('results/best_weights_2.hdf5')

In [11]:
head_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [12]:
head_model.output_shape

(None, 227)

## 2nd loop attempt

In [13]:
batch_size = 32
gen_test = ImageDataGenerator().flow_from_directory('images_test',
                                                    target_size=(224, 224),
                                                    class_mode=None, 
                                                    shuffle=False,
                                                    batch_size=batch_size)

Found 39706 images belonging to 1 classes.


In [14]:
num_images_test = len(gen_test.filenames)
num_images_test

39706

In [None]:
img_ids_test_all = np.array([int(fn[5:-4]) for fn in gen_test.filenames])
img_ids_test_all.shape

(39706,)

In [None]:
chunked_predictions = []
#chunked_img_ids = []

print(f'Batches: {ceil(num_images_test / gen_test.batch_size)}')
for i in range(ceil(num_images_test / gen_test.batch_size)):
    if i // 10 == i / 10:
        print(i)
    batch = gen_test.next()
    predictions = head_model.predict(batch)
    del batch
    chunked_predictions.append(predictions)
    # if i >= 10:
    #     print('breaking')
    #     break
    


Batches: 1241
0
10
20
30
40
50


In [None]:
np.array(chunked_predictions).shape

In [None]:
# np.array(chunked_img_ids).shape

In [None]:
combined = pd.concat([pd.DataFrame(pred) for pred in chunked_predictions])
combined.index = img_ids_test_all[:len(combined.index)]
combined.columns = range(1, 1 + len(combined.columns))
combined.sort_index(inplace=True)
combined.head()
combined.to_csv('results/combined.csv')

## Process combined df into submission format

In [None]:
# def process(row):
#     qualified_labels = []
#     for i in row.index:
#         # combined.iloc[2].sort_values(ascending=False)
#         if row[i] > 0.5:
#             qualified_labels.append(i)
    
#     # if nothing qualified, use the label with max proba
#     if len(qualified_labels) == 0:
#         qualified_labels.append(row.idxmax())
        
#     return qualified_labels

# processed = combined.apply(process, axis=1)

In [None]:
def process(row):
    qualified_labels = []
    for i in row.sort_values(ascending=False).index:  # in case sorting matters
        if row[i] > 0.5:
            qualified_labels.append(i)
    
    # if nothing qualified, use the label with max proba
    if len(qualified_labels) == 0:
        qualified_labels.append(row.idxmax())
    
    return ' '.join(str(label) for label in qualified_labels)

processed = pd.DataFrame(combined.apply(process, axis=1))
processed.index.name = 'image_id'
processed.columns = 'label_id', 

In [None]:
processed.shape

In [None]:
processed.to_csv('results/submission.csv')

In [None]:
processed