In [2]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import utils
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [3]:
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from keras.preprocessing.image import ImageDataGenerator

Using TensorFlow backend.


In [11]:
import glob
X_test = []
test_file_list = glob.glob('../input/test-jpg/*.jpg')
test_filenames = []
for t in tqdm(test_file_list):
    filename = os.path.basename(t).replace('.jpg', '')
    test_filenames.append(filename)
    image = cv2.imread(t)
    # image = utils.preprocess_single_image(image, use_grayscale=True, keep_original=False, normed=True)
    X_test.append(image)

100%|███████████████████████████████████████████████████████████████████████████| 61191/61191 [01:49<00:00, 557.28it/s]


In [12]:
X_test = np.stack(X_test, axis=0)
X_test.shape

(61191, 256, 256, 3)

In [13]:
X_test = X_test.astype('float32')
X_test /= 255

In [14]:
# create the base pre-trained model
base_model = InceptionV3(weights='imagenet', include_top=False)

# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)
# and a logistic layer -- let's say we have 200 classes
predictions = Dense(17, activation='sigmoid')(x)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False

# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer='adam', loss='binary_crossentropy')

In [None]:
model.load_weights("weights.00-0.13995.hdf5")

In [None]:
ytest=model.predict(X_test)

In [None]:
ytest.shape

In [None]:
data_dir = '../input'

train_label = pd.read_csv(os.path.join(data_dir, 'train_v2.csv'))
labels_str = 'agriculture, artisinal_mine, bare_ground, blooming, blow_down, clear, cloudy, conventional_mine, cultivation, habitation, haze, partly_cloudy, primary, road, selective_logging, slash_burn, water'
labels = labels_str.split(', ')
label_map = {x: labels.index(x) for x in labels}

In [None]:
def map_predictions(predictions, labels_map, thresholds=np.ones(17)*0.2):
    predictions_labels = []
    for prediction in predictions:
        labels = [labels_map[i] for i, value in enumerate(prediction) if value > thresholds[i]]
        predictions_labels.append(labels)

    return predictions_labels

In [None]:
predicted_labels = map_predictions(ytest, labels)

In [None]:
predicted_labels_str = [' '.join(x) for x in predicted_labels]

In [None]:
df = pd.DataFrame({'image_name': test_filenames, 'tags': predicted_labels_str})
df.to_csv('../output/keras_pred1.csv', index=False)