In [1]:
import json
from itertools import chain
from math import ceil

from keras.preprocessing.image import ImageDataGenerator
import numpy as np
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
from keras.applications.resnet50 import ResNet50
from keras import losses
from keras.layers import Flatten, Dense, Dropout
from keras.layers.normalization import BatchNormalization

In [3]:
from keras import Model
from sklearn.model_selection import train_test_split

In [4]:
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

## Load ResNet model

In [5]:
base_model = ResNet50(weights='imagenet', include_top = False, input_shape=(224, 224, 3))

In [14]:
# print(base_model.summary())

In [15]:
label_count=227

In [16]:
x = Flatten()(base_model.output)
x = Dense(4096, activation='relu')(x)
x = Dropout(0.5)(x)
x = BatchNormalization()(x)
predictions = Dense(label_count, activation = 'sigmoid')(x)

In [17]:
head_model = Model(input = base_model.input, output = predictions)

  if __name__ == '__main__':


In [18]:
head_model.load_weights('results/best_weights.hdf5')

In [19]:
head_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [20]:
head_model.output_shape

(None, 227)

## Go through test images and predict

In [36]:
batch_size = 100
gen_test = ImageDataGenerator().flow_from_directory('images_test',
                                                    target_size=(224, 224),
                                                    class_mode=None, 
                                                    shuffle=False,
                                                    batch_size=batch_size)
num_images_test = len(gen_test.filenames)

Found 39706 images belonging to 1 classes.


In [37]:
img_ids_test_all = np.array([int(fn[5:-4]) for fn in gen_test.filenames])
img_ids_test_all.shape

(39706,)

### first 1k

In [56]:
test_chunks = []
test_id_chunks = []

In [57]:
num_images_test = 1000

start_batch = 0
batches = []
for i in range(ceil(num_images_test / gen_test.batch_size)):
    batches.append(gen_test.next())
    
img_arrays_test = np.array(list(chain(*batches)))
del batches
img_arrays_test = img_arrays_test[:num_images_test]

test_chunks.append(img_arrays_test)


In [58]:
i

9

In [60]:
test_id_chunks.append(
    img_ids_test_all[start_batch * batch_size: 
                     start_batch * batch_size + num_images_test]
)


In [61]:
test_id_chunks

[array([    1,    10,   100,  1000, 10000, 10001, 10002, 10003, 10004,
        10005, 10006, 10007, 10008, 10009,  1001, 10010, 10011, 10012,
        10013, 10014, 10015, 10016, 10017, 10018, 10019,  1002, 10020,
        10021, 10022, 10023, 10024, 10025, 10026, 10027, 10028, 10029,
         1003, 10030, 10031, 10032, 10033, 10034, 10035, 10036, 10037,
        10038, 10039,  1004, 10040, 10041, 10042, 10043, 10044, 10045,
        10046, 10047, 10048, 10049,  1005, 10050, 10051, 10052, 10053,
        10054, 10055, 10056, 10057, 10058, 10059,  1006, 10060, 10061,
        10062, 10063, 10064, 10065, 10066, 10067, 10068, 10069,  1007,
        10070, 10071, 10072, 10073, 10074, 10075, 10076, 10077, 10078,
        10079,  1008, 10080, 10081, 10082, 10083, 10084, 10085, 10086,
        10087, 10088, 10089,  1009, 10090, 10091, 10092, 10093, 10094,
        10095, 10096, 10097, 10098, 10099,   101,  1010, 10100, 10101,
        10102, 10103, 10104, 10105, 10106, 10107, 10108, 10109,  1011,
      

In [63]:
prediction_chunks = []

In [65]:
%%time
prediction_chunks.append(head_model.predict(test_chunks[0]))

CPU times: user 20min 54s, sys: 1min 45s, total: 22min 39s
Wall time: 6min 11s


In [67]:
prediction_chunks[0].shape

(100, 227)

In [None]:
num_images_test = 1000

start_batch = 1
batches = []
for i in range(ceil(num_images_test / gen_test.batch_size)):
    batches.append(gen_test.next())
    
img_arrays_test = np.array(list(chain(*batches)))
del batches
img_arrays_test = img_arrays_test[:num_images_test]

test_chunks.append(img_arrays_test)


## 2nd loop attempt

In [83]:
batch_size = 32
gen_test = ImageDataGenerator().flow_from_directory('images_test',
                                                    target_size=(224, 224),
                                                    class_mode=None, 
                                                    shuffle=False,
                                                    batch_size=batch_size)

Found 39706 images belonging to 1 classes.


In [84]:
num_images_test = len(gen_test.filenames)
num_images_test

39706

In [85]:
img_ids_test_all = np.array([int(fn[5:-4]) for fn in gen_test.filenames])
img_ids_test_all.shape

(39706,)

In [88]:
chunked_predictions = []
chunked_img_ids = []

for i in range(ceil(num_images_test / gen_test.batch_size)):
    print(i)
    batch = gen_test.next()
    predictions = head_model.predict(batch)
    img_ids = img_ids_test_all[i * gen_test.batch_size : (i + 1) * gen_test.batch_size]
    chunked_predictions.append(predictions)
    chunked_img_ids.append(img_ids)
    if i >= 5:
        print('breaking')
        break


0
1
2
3
4
5
breaking


In [93]:
np.array(chunked_predictions).shape

(6, 32, 227)

In [91]:
np.array(chunked_img_ids).shape

(6, 32)

In [95]:
chunked_predictions[0]

array([0.01547124, 0.0450897 , 0.02162396, 0.03189637, 0.03580599,
       0.03269533, 0.03607636, 0.57041144, 0.2762965 , 0.01618437,
       0.02135538, 0.04706649, 0.01934328, 0.0767343 , 0.02671728,
       0.02297901, 0.01805874, 0.02229851, 0.0311577 , 0.71278363,
       0.0423255 , 0.04711476, 0.04254869, 0.02227439, 0.00928177,
       0.04010453, 0.17350438, 0.02189047, 0.03521808, 0.02693619,
       0.05884839, 0.03063242, 0.02176283, 0.0453779 , 0.02936278,
       0.03961569, 0.05542257, 0.03927505, 0.03291159, 0.04870011,
       0.04502208, 0.03378347, 0.11730143, 0.09574386, 0.03419983,
       0.02141596, 0.0201642 , 0.0469939 , 0.02543291, 0.03820044,
       0.06821832, 0.07181704, 0.01966202, 0.03039076, 0.06224285,
       0.02741141, 0.02015221, 0.06761385, 0.08629825, 0.07163592,
       0.6772691 , 0.02436218, 0.0249885 , 0.0388895 , 0.0323426 ,
       0.02882665, 0.03848581, 0.02242135, 0.04464923, 0.06055853,
       0.06898058, 0.5438623 , 0.05337121, 0.03407285, 0.08674

In [117]:
combined = pd.concat([pd.DataFrame(pred) for pred in chunked_predictions])
combined.index = np.concatenate(chunked_img_ids)
combined.columns = range(1, 1 + len(combined.columns))
combined

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,218,219,220,221,222,223,224,225,226,227
1,0.015471,0.045090,0.021624,0.031896,0.035806,0.032695,0.036076,0.570411,0.276296,0.016184,...,0.060269,0.047812,0.055514,0.050397,0.061571,0.033242,0.025469,0.158317,0.275286,0.007419
10,0.014402,0.028376,0.011889,0.019857,0.012750,0.051179,0.023252,0.445014,0.996229,0.018676,...,0.015587,0.128992,0.056993,0.025327,0.039941,0.010555,0.009687,0.223370,0.013568,0.013763
100,0.011814,0.022332,0.016337,0.021459,0.006230,0.023783,0.060604,0.218199,0.999530,0.034978,...,0.004945,0.085319,0.049323,0.019049,0.028748,0.015164,0.012792,0.054249,0.006588,0.016076
1000,0.007304,0.012954,0.024639,0.013152,0.010992,0.027407,0.030604,0.304808,0.992281,0.012844,...,0.004481,0.061891,0.018643,0.133207,0.040261,0.013288,0.005247,0.059042,0.007067,0.008165
10000,0.005094,0.015215,0.016866,0.011703,0.010637,0.057631,0.013181,0.045106,0.074867,0.006803,...,0.006718,0.007608,0.009735,0.028190,0.021790,0.013633,0.010165,0.024751,0.010608,0.004563
10001,0.005691,0.012119,0.017751,0.008929,0.012027,0.021677,0.008019,0.833133,0.466322,0.005769,...,0.007569,0.011634,0.018596,0.034125,0.024686,0.011739,0.012391,0.121557,0.025003,0.003847
10002,0.004984,0.052278,0.045735,0.056658,0.032421,0.036652,0.034357,0.099196,0.035024,0.015605,...,0.030228,0.036386,0.027021,0.073901,0.130711,0.026000,0.026626,0.270617,0.130612,0.007358
10003,0.005563,0.014714,0.013420,0.027383,0.021803,0.028117,0.016118,0.714997,0.154954,0.008429,...,0.009178,0.062862,0.022761,0.051373,0.037268,0.010919,0.018502,0.154158,0.116654,0.005379
10004,0.010940,0.038539,0.019544,0.031600,0.030570,0.060299,0.018424,0.509056,0.189902,0.009048,...,0.012331,0.032474,0.030494,0.040431,0.083822,0.020542,0.010402,0.188388,0.127574,0.002305
10005,0.008421,0.031258,0.014648,0.017064,0.028346,0.070627,0.020682,0.509962,0.567542,0.011892,...,0.011441,0.029163,0.020785,0.056609,0.056024,0.026951,0.013334,0.093772,0.049762,0.004803


[array([    1,    10,   100,  1000, 10000, 10001, 10002, 10003, 10004,
        10005, 10006, 10007, 10008, 10009,  1001, 10010, 10011, 10012,
        10013, 10014, 10015, 10016, 10017, 10018, 10019,  1002, 10020,
        10021, 10022, 10023, 10024, 10025]),
 array([10026, 10027, 10028, 10029,  1003, 10030, 10031, 10032, 10033,
        10034, 10035, 10036, 10037, 10038, 10039,  1004, 10040, 10041,
        10042, 10043, 10044, 10045, 10046, 10047, 10048, 10049,  1005,
        10050, 10051, 10052, 10053, 10054]),
 array([10055, 10056, 10057, 10058, 10059,  1006, 10060, 10061, 10062,
        10063, 10064, 10065, 10066, 10067, 10068, 10069,  1007, 10070,
        10071, 10072, 10073, 10074, 10075, 10076, 10077, 10078, 10079,
         1008, 10080, 10081, 10082, 10083]),
 array([10084, 10085, 10086, 10087, 10088, 10089,  1009, 10090, 10091,
        10092, 10093, 10094, 10095, 10096, 10097, 10098, 10099,   101,
         1010, 10100, 10101, 10102, 10103, 10104, 10105, 10106, 10107,
        10108

## process combined into wanted format

In [1]:
combined

NameError: name 'combined' is not defined