In [1]:
from __future__ import division
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Dropout, Flatten, Merge, merge
from keras.layers.merge import Concatenate
from keras.layers import Input, Activation, Dense, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D, AveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.layers.core import Lambda
from keras import backend as K
import os
from sklearn.utils import shuffle
import random
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from sklearn.metrics import fbeta_score
from keras.optimizers import Adam, SGD
from keras import applications

import tensorflow as tf

Using TensorFlow backend.


In [2]:
tf

<module 'tensorflow' from '/usr/local/lib/python2.7/dist-packages/tensorflow/__init__.pyc'>

In [13]:
def make_parallel(model, gpu_count):
    def get_slice(data, idx, parts):
        shape = tf.shape(data)
        size = tf.concat([ shape[:1] // parts, shape[1:] ],axis=0)
        stride = tf.concat([ shape[:1] // parts, shape[1:]*0 ],axis=0)
        start = stride * idx
        return tf.slice(data, start, size)

    outputs_all = []
    for i in range(len(model.outputs)):
        outputs_all.append([])

    #Place a copy of the model on each GPU, each getting a slice of the batch
    for i in range(gpu_count):
        with tf.device('/gpu:%d' % i):
            with tf.name_scope('tower_%d' % i) as scope:

                inputs = []
                #Slice each input into a piece for processing on this GPU
                for x in model.inputs:
                    input_shape = tuple(x.get_shape().as_list())[1:]
                    slice_n = Lambda(get_slice, output_shape=input_shape, arguments={'idx':i,'parts':gpu_count})(x)
                    inputs.append(slice_n)                

                outputs = model(inputs)
                
                if not isinstance(outputs, list):
                    outputs = [outputs]
                
                #Save all the outputs for merging back together later
                for l in range(len(outputs)):
                    outputs_all[l].append(outputs[l])

    # merge outputs on CPU
    with tf.device('/cpu:0'):
        merged = []
        for outputs in outputs_all:
            merged.append(Concatenate(axis=0)(outputs))
            
        return Model(inputs=model.inputs, outputs=merged)

In [4]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1"

def fbeta_loss(y_true, y_pred):
    beta_squared = 4

    tp = K.sum(y_true * y_pred) + K.epsilon()
    fp = K.sum(y_pred) - tp
    fn = K.sum(y_true) - tp

    precision = tp / (tp + fp)
    recall = tp / (tp + fn)

    result = 1 - (beta_squared + 1) * (precision * recall) / (beta_squared * precision + recall + K.epsilon())

    return result

def fbeta_score_K(y_true, y_pred):
    beta_squared = 4

    tp = K.sum(y_true * y_pred) + K.epsilon()
    fp = K.sum(y_pred) - tp
    fn = K.sum(y_true) - tp

    precision = tp / (tp + fp)
    recall = tp / (tp + fn)

    result = (beta_squared + 1) * (precision * recall) / (beta_squared * precision + recall + K.epsilon())

    return result

def rotate(img):
    rows = img.shape[0]
    cols = img.shape[1]
    angle = np.random.choice((10, 20, 30))#, 40, 50, 60, 70, 80, 90))
    rotation_M = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 1)
    img = cv2.warpAffine(img, rotation_M, (cols, rows))
    return img

def rotate_bound(image, size):
    #credits http://www.pyimagesearch.com/2017/01/02/rotate-images-correctly-with-opencv-and-python/
    (h, w) = image.shape[:2]
    (cX, cY) = (w // 2, h // 2)

    angle = np.random.randint(10,180)

    M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
    cos = np.abs(M[0, 0])
    sin = np.abs(M[0, 1])

    # compute the new bounding dimensions of the image
    nW = int((h * sin) + (w * cos))
    nH = int((h * cos) + (w * sin))

    # adjust the rotation matrix to take into account translation
    M[0, 2] += (nW / 2) - cX
    M[1, 2] += (nH / 2) - cY

    output = cv2.resize(cv2.warpAffine(image, M, (nW, nH)), (size, size))
    return output

def perspective(img):
    rows = img.shape[0]
    cols = img.shape[1]

    shrink_ratio1 = np.random.randint(low=85, high=110, dtype=int) / 100
    shrink_ratio2 = np.random.randint(low=85, high=110, dtype=int) / 100

    zero_point = rows - np.round(rows * shrink_ratio1, 0)
    max_point_row = np.round(rows * shrink_ratio1, 0)
    max_point_col = np.round(cols * shrink_ratio2, 0)

    src = np.float32([[zero_point, zero_point], [max_point_row-1, zero_point], [zero_point, max_point_col+1], [max_point_row-1, max_point_col+1]])
    dst = np.float32([[0, 0], [rows, 0], [0, cols], [rows, cols]])

    perspective_M = cv2.getPerspectiveTransform(src, dst)

    img = cv2.warpPerspective(img, perspective_M, (cols,rows))#, borderValue=mean_pix)
    return img

def shift(img):
    rows = img.shape[0]
    cols = img.shape[1]

    shift_ratio1 = (random.random() * 2 - 1) * np.random.randint(low=3, high=15, dtype=int)
    shift_ratio2 = (random.random() * 2 - 1) * np.random.randint(low=3, high=15, dtype=int)

    shift_M = np.float32([[1,0,shift_ratio1], [0,1,shift_ratio2]])
    img = cv2.warpAffine(img, shift_M, (cols, rows))#, borderValue=mean_pix)
    return img

In [67]:
def batch_generator_train(zip_list, img_size, batch_size, is_train=True, shuffle=True):
    number_of_batches = np.ceil(len(zip_list) / batch_size)
    if shuffle == True:
        random.shuffle(zip_list)
    counter = 0
    offsets = np.arange(0, 12)
    while True:
        if shuffle == True:
            random.shuffle(zip_list)
        batch_files = zip_list[batch_size*counter:batch_size*(counter+1)]
        image_list = []
        mask_list = []

        for file, mask in batch_files:

            image = cv2.imread(file) #cv2.resize(cv2.imread(file), (img_size,img_size)) / 255.
            x, y = np.random.choice(offsets, 2)
            image = image[x:x+img_size, y:y+img_size]
            image = image[:, :, [2, 1, 0]] - mean_pix

            rnd_flip = np.random.randint(2, dtype=int)
            rnd_rotate = np.random.randint(2, dtype=int)
            rnd_zoom = np.random.randint(2, dtype=int)
            rnd_shift = np.random.randint(2, dtype=int)

            if (rnd_flip == 1) & (is_train == True):
                rnd_flip = np.random.randint(3, dtype=int) - 1
                image = cv2.flip(image, rnd_flip)

            if (rnd_rotate == 1) & (is_train == True):
                image = rotate_bound(image, img_size)

            if (rnd_zoom == 1) & (is_train == True):
                image = perspective(image)

            if (rnd_shift == 1) & (is_train == True):
                image = shift(image)

            image_list.append(image)
            mask_list.append(mask)

        counter += 1
        image_list = np.array(image_list)
        mask_list = np.array(mask_list)

        yield (image_list, mask_list)

        if counter == number_of_batches:
            if shuffle == True:
                random.shuffle(zip_list)
            counter = 0

def batch_generator_test(zip_list, img_size, batch_size, shuffle=True):
    number_of_batches = int(np.ceil(len(zip_list)/2))
    print(len(zip_list), number_of_batches)
    counter = 0
    if shuffle:
        random.shuffle(zip_list)
    while True:
        batch_files = zip_list[2*counter:2*(counter+1)]
        image_list = []
        mask_list = []
        
        print 'counter: ' + str(counter)

        for file, mask in batch_files:
            
            print file

            image = cv2.resize(cv2.imread(file), (img_size, img_size))
            image = image[:, :, [2, 1, 0]] - mean_pix
            image_list.append(image)
            mask_list.append(mask)

        counter += 1
        image_list = np.array(image_list)
        mask_list = np.array(mask_list)

        yield (image_list, mask_list)

        if counter == number_of_batches:
            random.shuffle(zip_list)
            counter = 0

def predict_generator(files, img_size, batch_size):
    number_of_batches = np.ceil(len(files) / batch_size)
    print(len(files), number_of_batches)
    counter = 0
    int_counter = 0

    while True:
            beg = batch_size * counter
            end = batch_size * (counter + 1)
            batch_files = files[beg:end]
            image_list = []

            for file in batch_files:
                int_counter += 1
                image = cv2.resize(cv2.imread(file), (img_size, img_size))
                image = image[:, :, [2, 1, 0]] - mean_pix

                rnd_flip = np.random.randint(2, dtype=int)
                rnd_rotate = np.random.randint(2, dtype=int)
                rnd_zoom = np.random.randint(2, dtype=int)
                rnd_shift = np.random.randint(2, dtype=int)

                if rnd_flip == 1:
                    rnd_flip = np.random.randint(3, dtype=int) - 1
                    image = cv2.flip(image, rnd_flip)

                if rnd_rotate == 1:
                    image = rotate_bound(image, img_size)

                if rnd_zoom == 1:
                    image = perspective(image)

                if rnd_shift == 1:
                    image = shift(image)

                image_list.append(image)

            counter += 1

            image_list = np.array(image_list)

            yield (image_list)


def f2_score(y_true, y_pred):
    y_true, y_pred, = np.array(y_true), np.array(y_pred)
    score = fbeta_score(y_true, y_pred, beta=2, average='samples')
    return score


In [107]:
GLOBAL_PATH = '/home/user/data/amazon_planet/'
TRAIN_FOLDER = '/home/user/data/amazon_planet/train-jpg/' #All train files resized to 224*224
TEST_FOLDER = '/home/user/data/amazon_planet/test-jpg/' #All test files in one folder
F_CLASSES = GLOBAL_PATH + 'train_v2.csv'

df_train = pd.read_csv(F_CLASSES)
df_test = pd.read_csv(GLOBAL_PATH + 'sample_submission_v2.csv')

labels = ['blow_down',
          'bare_ground',
          'conventional_mine',
          'blooming',
          'cultivation',
          'artisinal_mine',
          'haze',
          'primary',
          'slash_burn',
          'habitation',
          'clear',
          'road',
          'selective_logging',
          'partly_cloudy',
          'agriculture',
          'water',
          'cloudy']
label_map = {'agriculture': 14,
             'artisinal_mine': 5,
             'bare_ground': 1,
             'blooming': 3,
             'blow_down': 0,
             'clear': 10,
             'cloudy': 16,
             'conventional_mine': 2,
             'cultivation': 4,
             'habitation': 9,
             'haze': 6,
             'partly_cloudy': 13,
             'primary': 7,
             'road': 11,
             'selective_logging': 12,
             'slash_burn': 8,
             'water': 15}

flatten = lambda l: [item for sublist in l for item in sublist]

In [7]:
x_train = []
x_test = []
y_train = []


for f, tags in tqdm(df_train.values, miniters=1000):
    img = TRAIN_FOLDER + '{}.jpg'.format(f)
    targets = np.zeros(17)
    for t in tags.split(' '):
        targets[label_map[t]] = 1
    x_train.append(img)
    y_train.append(targets)

100%|██████████| 40479/40479 [00:00<00:00, 146141.52it/s]


In [8]:
x_train, x_holdout, y_train, y_holdout = x_train[3000:-1], x_train[:3000], y_train[3000:-1], y_train[:3000]

x_train, y_train = shuffle(x_train, y_train, random_state = 24)

part = 0.85
split = int(round(part*len(y_train)))
x_train, x_valid, y_train, y_valid = x_train[:split], x_train[split:], y_train[:split], y_train[split:]
print('x tr: ', len(x_train))

('x tr: ', 31856)


In [9]:
#define callbacks
callbacks = [ModelCheckpoint('amazon_2007.hdf5', monitor='val_loss', save_best_only=True, verbose=2, save_weights_only=False),
             ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, verbose=1, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0.0000001),
             EarlyStopping(monitor='val_loss', patience=5, verbose=0)]

In [10]:
BATCH = 128
IMG_SIZE = 224
mean_pix = np.array([102.9801, 115.9465, 122.7717]) #It is BGR

In [None]:
#Compile model and set non-top layets non-trainable (warm-up)
base_model = applications.resnet50.ResNet50(include_top=False, input_shape=(IMG_SIZE,IMG_SIZE,3), pooling='avg', weights='imagenet')
for layer in base_model.layers:
    layer.trainable = False

In [None]:
x = base_model.output
x = Dense(2048, activation='relu')(x)
x = Dropout(0.25)(x)
output = Dense(17, activation='sigmoid')(x)

In [None]:
model = Model(inputs=base_model.inputs, outputs=output)
model = make_parallel(model, 2)

In [None]:
model.summary()

In [None]:
optimizer = Adam(0.001, decay=0.0003)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy', fbeta_score_K])

In [None]:
model.fit_generator(generator=batch_generator_train(list(zip(x_train, y_train)), IMG_SIZE, BATCH),
                          steps_per_epoch=np.ceil(len(x_train)/BATCH),
                          epochs=1,
                          verbose=1,
                          validation_data=batch_generator_train(list(zip(x_valid, y_valid)), IMG_SIZE, 16),
                          validation_steps=np.ceil(len(x_valid)/16),
                          callbacks=callbacks,
                          initial_epoch=0,
                           use_multiprocessing=True,
                           workers=16)

In [None]:
#Compile model and set all layers trainable
optimizer = Adam(0.0001, decay=0.00000001)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy', fbeta_score_K])
model.load_weights('amazon_2007.hdf5', by_name=True)
for layer in base_model.layers:
    layer.trainable = True

In [None]:
BATCH = 32
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy', fbeta_score_K])
model.fit_generator(generator=batch_generator_train(list(zip(x_train, y_train)), IMG_SIZE, BATCH),
                          steps_per_epoch=np.ceil(len(x_train)/BATCH),
                          epochs=50,
                          verbose=1,
                          validation_data=batch_generator_train(list(zip(x_valid, y_valid)), IMG_SIZE, 16),
                          validation_steps=np.ceil(len(x_valid)/16),
                          callbacks=callbacks,
                          initial_epoch=0,
                           use_multiprocessing=True,
                           workers=16)

In [16]:
#Compile model and set all layers trainable
model=load_model('amazon_2007.hdf5', custom_objects={"tf": tf,"fbeta_score_K": fbeta_score_K})
# optimizer = Adam(1.6000000187e-07, decay=0.00000001)
# model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy', fbeta_score_K])
# model.load_weights('amazon_2007.hdf5', by_name=True)
# for layer in base_model.layers:
#     layer.trainable = True

In [17]:
optimizer = Adam(1.0e-06, decay=0.00000001)
model = make_parallel(model, 2)
# model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy', fbeta_score_K])

In [18]:
BATCH = 32
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy', fbeta_score_K])
model.fit_generator(generator=batch_generator_train(list(zip(x_train, y_train)), IMG_SIZE, BATCH),
                          steps_per_epoch=np.ceil(len(x_train)/BATCH),
                          epochs=50,
                          verbose=1,
                          validation_data=batch_generator_train(list(zip(x_valid, y_valid)), IMG_SIZE, 16),
                          validation_steps=np.ceil(len(x_valid)/16),
                          callbacks=callbacks,
                          initial_epoch=0,
                           use_multiprocessing=True,
                           workers=32)























































































































































































































Epoch 1/50



Epoch 00000: val_loss improved from inf to 0.09572, saving model to amazon_2007.hdf5
Epoch 2/50
Epoch 3/50
Epoch 4/50

Epoch 00003: reducing learning rate to 1.99999999495e-07.
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50

Epoch 00011: reducing learning rate to 1e-07.
Epoch 13/50
Epoch 14/50
Epoch 15/50


<keras.callbacks.History at 0x7f6d58757dd0>

In [46]:
model.load_weights('amazon_2007.hdf5')


x_val = []
y_val = []
x_hld = []
y_hld = []
x_test = []
y_test = []

In [47]:
#====================== validation set est =================================
for f, tags in tqdm(list(zip(x_valid, y_valid)), miniters=1000):
    y_val.append(tags)

p_valid = model.predict_generator(batch_generator_test(list(zip(x_valid, y_valid)), IMG_SIZE, 4, shuffle=False),
                                  steps=int(np.ceil(len(x_valid)/4)),
                                 verbose=1)

print('val_set: ', fbeta_score(np.array(y_val), np.array(p_valid) > 0.2, beta=2, average='samples'))
#===========================================================================


100%|██████████| 5622/5622 [00:00<00:00, 673859.83it/s]

(5622, 1406)
counter: 0
counter: 1
counter: 2
counter: 3
counter: 4
counter: 5
counter: 6
counter: 7
counter: 8
counter: 9
counter: 10
   1/1406 [..............................] - ETA: 222s




counter: 11
   2/1406 [..............................] - ETA: 175scounter: 12
   3/1406 [..............................] - ETA: 183scounter: 13
   4/1406 [..............................] - ETA: 181scounter: 14
   5/1406 [..............................] - ETA: 176scounter: 15
   6/1406 [..............................] - ETA: 171scounter: 16
   7/1406 [..............................] - ETA: 167scounter: 17
   8/1406 [..............................] - ETA: 160scounter: 18
   9/1406 [..............................] - ETA: 158scounter: 19
  10/1406 [..............................] - ETA: 156scounter: 20
  11/1406 [..............................] - ETA: 154scounter: 21
  12/1406 [..............................] - ETA: 152scounter: 22
  13/1406 [..............................] - ETA: 152scounter: 23
  14/1406 [..............................] - ETA: 150scounter: 24
  15/1406 [..............................] - ETA: 149scounter: 25
  16/1406 [..............................] - ETA: 147scounter: 2

 125/1406 [=>............................] - ETA: 126scounter: 135
 126/1406 [=>............................] - ETA: 126scounter: 136
 127/1406 [=>............................] - ETA: 126scounter: 137
 128/1406 [=>............................] - ETA: 126scounter: 138
 129/1406 [=>............................] - ETA: 126scounter: 139
 130/1406 [=>............................] - ETA: 126scounter: 140
 131/1406 [=>............................] - ETA: 125scounter: 141
 132/1406 [=>............................] - ETA: 125scounter: 142
 133/1406 [=>............................] - ETA: 125scounter: 143
 134/1406 [=>............................] - ETA: 125scounter: 144
 135/1406 [=>............................] - ETA: 125scounter: 145
 136/1406 [=>............................] - ETA: 125scounter: 146
 137/1406 [=>............................] - ETA: 125scounter: 147
 138/1406 [=>............................] - ETA: 125scounter: 148
 139/1406 [=>............................] - ETA: 125scounter:

 247/1406 [====>.........................] - ETA: 113scounter: 257
 248/1406 [====>.........................] - ETA: 113scounter: 258
 249/1406 [====>.........................] - ETA: 113scounter: 259
 250/1406 [====>.........................] - ETA: 112scounter: 260
 251/1406 [====>.........................] - ETA: 112scounter: 261
 252/1406 [====>.........................] - ETA: 112scounter: 262
 253/1406 [====>.........................] - ETA: 112scounter: 263
 254/1406 [====>.........................] - ETA: 112scounter: 264
 255/1406 [====>.........................] - ETA: 112scounter: 265
 256/1406 [====>.........................] - ETA: 112scounter: 266
 257/1406 [====>.........................] - ETA: 112scounter: 267
 258/1406 [====>.........................] - ETA: 111scounter: 268
 259/1406 [====>.........................] - ETA: 111scounter: 269
 260/1406 [====>.........................] - ETA: 111scounter: 270
 261/1406 [====>.........................] - ETA: 111scounter:



















ValueError: Found input variables with inconsistent numbers of samples: [5622, 5620]

In [76]:
p_valid2 = model.predict_generator(batch_generator_test(list(zip(x_valid, y_valid))[-4:], IMG_SIZE, 2, shuffle=False),
                                  steps=2, max_queue_size=1, workers=1, use_multiprocessing=False,
                                 verbose=1)

(4, 2)
counter: 0
/home/user/data/amazon_planet/train-jpg/train_24633.jpg
/home/user/data/amazon_planet/train-jpg/train_22857.jpg
counter: 1
/home/user/data/amazon_planet/train-jpg/train_17528.jpg
/home/user/data/amazon_planet/train-jpg/train_3899.jpg
/home/user/data/amazon_planet/train-jpg/train_24633.jpg
/home/user/data/amazon_planet/train-jpg/train_3899.jpg


In [77]:
p_valid2

array([], shape=(0, 17), dtype=float32)

In [None]:
def optimise_f2_thresholds(y, p, verbose=True, resolution=100):
    #credits https://www.kaggle.com/c/planet-understanding-the-amazon-from-space/discussion/32475
  def mf(x):
    p2 = np.zeros_like(p)
    for i in range(17):
      p2[:, i] = (p[:, i] > x[i]).astype(np.int)
    score = fbeta_score(y, p2, beta=2, average='samples')
    return score

  x = [0.2]*17
  for i in range(17):
    best_i2 = 0
    best_score = 0
    for i2 in range(resolution):
      i2 /= resolution
      x[i] = i2
      score = mf(x)
      if score > best_score:
        best_i2 = i2
        best_score = score
    x[i] = best_i2
    if verbose:
      print(i, best_i2, best_score)

  return x

X = optimise_f2_thresholds(np.array(y_val), np.array(p_valid))

In [78]:
#====================== holdout set est =================================
for f, tags in tqdm(list(zip(x_holdout, y_holdout)), miniters=1000):
    img = cv2.resize(cv2.imread(f), (IMG_SIZE, IMG_SIZE))
    x_hld.append(img)
    y_hld.append(tags)

if len(x_holdout) % 2 > 0:
    x_hld.append(x_hld[0])
    y_hld.append(y_hld[0])

x_hld = np.array(x_hld, np.float16)

p_valid = model.predict(x_hld, batch_size=28, verbose=2)
print('holdout set: ', f2_score(np.array(y_hld), np.array(p_valid) > 0.2))
print('holdout set w/ thresh: ', f2_score(np.array(y_hld), np.array(p_valid) > 0.19))
#===========================================================================

100%|██████████| 3000/3000 [00:05<00:00, 582.94it/s]


('holdout set: ', 0.4228732416922274)
('holdout set w/ thresh: ', 0.42908192056572214)


In [79]:
for f, tags in tqdm(df_test.values, miniters=1000):
    img = TEST_FOLDER + '{}.jpg'.format(f)
    x_test.append(img)

batch_size_test = 32
len_test = len(x_test)
x_tst = []
yfull_test = []

100%|██████████| 61191/61191 [00:00<00:00, 334137.89it/s]


In [80]:
TTA_steps = 11

for k in range(0, TTA_steps):
    print(k)
    probs = model.predict_generator(predict_generator(x_test,IMG_SIZE,batch_size_test), steps=np.ceil(len(x_test)/batch_size_test),verbose=1)
    yfull_test.append(probs)
    k += 1

result = np.array(yfull_test[0])

for i in range(1, TTA_steps):
    result += np.array(yfull_test[i])
result /= TTA_steps

res = pd.DataFrame(result, columns=labels)
preds = []

for i in tqdm(range(res.shape[0]), miniters=1000):
    a = res.ix[[i]]
    a = a.apply(lambda x: x > X, axis=1)
    a = a.transpose()
    a = a.loc[a[i] == True]
    ' '.join(list(a.index))
    preds.append(' '.join(list(a.index)))

print(len(preds))

0(61191, 1913.0)

1
(61191, 1913.0)
 151/1913 [=>............................] - ETA: 374s

KeyboardInterrupt: 

In [85]:
result = np.array(yfull_test[0])
result.shape
res = pd.DataFrame(result, columns=labels)
preds = []

for i in tqdm(range(res.shape[0]), miniters=1000):
    a = res.ix[[i]]
    a = a.apply(lambda x: x > 0.19, axis=1)
    a = a.transpose()
    a = a.loc[a[i] == True]
    ' '.join(list(a.index))
    preds.append(' '.join(list(a.index)))

print(len(preds))

100%|██████████| 61188/61188 [01:58<00:00, 518.05it/s]

61188





In [109]:
df_test['tags'] = preds
# df_test = df_test[:-57]
df_test.to_csv('submission.csv', index=False)

In [103]:
len(preds)

61191

In [108]:
df_test.shape

(61191, 2)

In [105]:
df_test = df_test[:-43]

In [106]:
df_test.shape

(61091, 2)