In [40]:
import keras
keras.__version__

'2.1.2'

# Making predictions on test data

In [51]:
from keras.models import load_model

# Load weights of model trained on AWS
model = load_model('./weights/name_that_whale_702.h5')
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 128)               9437312   
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 4250)              548250    
Total params: 9,985,562
Trainable params: 9,985,562
Non-trainable params: 0
_________________________________________________________________


**Loading the test images**

In [42]:
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator

home_dir = os.getcwd()
test_dir = os.path.join(home_dir, 'test/')
test_list = os.listdir(test_dir)
test_count = len(test_list)
print(test_count)


15610


In [43]:
from keras.preprocessing import image

# Model was trained using images of this size
image_size = (180,180)

def load_image(img_path):
    img = image.load_img(img_path, target_size = image_size)
    x = image.img_to_array(img)
    x /= 255
    return x

In [44]:
import os
import numpy as np

home_dir = os.getcwd()
fname = os.path.join(home_dir, 'targets.csv') # targets for both train and validation

f = open(fname)
data = f.read()
f.close()

lines = data.split('\n')
header = lines[0].split(',')
lines = lines[1:]
lines = lines[:-1]

ids = [line.split(',')[1] for line in lines]
whale_ids = set(ids) # convert to set to remove duplicats
whale_ids = list(whale_ids) # convert back to list to make it ordered
whale_ids.remove('new_whale')
whale_ids = sorted(whale_ids)
ids_count = len(whale_ids)

print(header)
print(len(lines))
print(ids_count)

['Image', 'Id']
9850
4250


# Baseline
Extracting baseline info. Get the top 5 most frequently occuring ids in train data

In [68]:
from collections import Counter

top_5 = Counter(ids).most_common(5)
top_5_base = []
for i in range(5):
    t = top_5[i][0], top_5[i][1]/len(lines)
    top_5_base.append(t)

top_5_base
p_new_whale = top_5_base[0][1]

## Predicting labels

In [50]:
from keras.applications import Xception
image_size = (180,180) #adjustable parameter for processed image_size. Run time should 

conv_base = Xception(weights='imagenet',
                  include_top=False,
                  input_shape=(image_size[0], image_size[1], 3))

In [94]:
batch_size = 10

def predict_labels(augmentation):
    # augmentation parameter is true if model was trained in the train_aug.ipynb, otherwise false
    labels = np.zeros(shape=(test_count,ids_count))
    for i in range(1561):
        batch = np.zeros(shape=(batch_size,180,180,3))
        for j in range(batch_size):
            batch[j] = load_image(os.path.join(test_dir,test_list[i*batch_size + j]))
        if augmentation:
            labels_batch = model.predict(batch)
        else:  
            features_batch = conv_base.predict(batch)
            features_batch = np.reshape(features_batch,(batch_size,6*6*2048))
            labels_batch = model.predict(features_batch)
        labels[i * batch_size : (i + 1) * batch_size] = labels_batch
    return labels
              

In [95]:
label_prediction = predict_labels(False)

In [65]:
f = open('predictions/label_prediction_1.txt','w')
f.write(str(label_prediction))
f.close()

In [97]:
# This function takes in a label vector, and returns the top 5 whale ids in order
import copy
def get_ids(label_vec):
    ids = ''
    vec = copy.copy(label_vec)
    for i in range(5):
        max_value = max(vec)
        max_index = np.where(vec==max_value)
        ids += whale_ids[max_index[0][0]]
        vec[max_index] = -1
        if i is not 4:
            ids += ' '
    return ids

In [118]:
# This function takes in a label vector, and returns the top 5 whale ids in order, or inserts
# new_whale if the proability of a guessed ID is less than p_new
p_new = .95
def get_ids_or_new_whale(label_vec):
    ids = ''
    vec = copy.copy(label_vec)
    new_whale_guessed = False
    for i in range(5):
        max_value = max(vec)
        if max_value < p_new and new_whale_guessed != True:
            ids += 'new_whale'
            new_whale_guessed = True
        else:
            max_index = np.where(vec==max_value)
            ids += whale_ids[max_index[0][0]]
            vec[max_index] = -1
        if i is not 4:
            ids += ' '
    return ids

In [83]:
def top_5_prediction(label_vec):
    top_5_pred = []
    vec = copy.copy(label_vec)
    for i in range(5):
        max_value = max(vec)
        max_index = np.where(vec==max_value)
        max_id = whale_ids[max_index[0][0]]
        p = max_id, max_value
        top_5_pred.append(p)
        vec[max_index] = -1
    return top_5_pred
    

In [84]:
get_ids_or_new_whale(label_prediction[1])

'new_whale w_b0362e2 w_5a2075e w_cae7677 w_c00534d'

In [112]:
for i in range(20):
    print(max(label_prediction[i]))
    print('  ' + get_ids_or_new_whale(label_prediction[i]))

0.790890276432
  w_eb0a6ed new_whale w_886257d w_b0e05b1 w_b0362e2
0.188661500812
  new_whale w_6556c5c w_a254eb0 w_392bee3 w_540fd73
0.0436196923256
  new_whale w_98baff9 w_74adf0b w_97f5054 w_ddbf533
0.999880671501
  w_7c7a78c new_whale w_9ca943b w_26edeb8 w_2fe43c7
0.130795732141
  new_whale w_1da7080 w_8459e39 w_41fa033 w_680e011
0.35473921895
  new_whale w_89e159a w_95874a5 w_dfbfe10 w_bb2d34d
0.333799034357
  new_whale w_b0e05b1 w_cd70e8b w_1287fbc w_0e737d0
0.730777025223
  w_ed117b3 new_whale w_a254eb0 w_d141590 w_3027b8f
0.374759882689
  new_whale w_f4e03d4 w_6c803bf w_a524549 w_9c5ed68
0.253903359175
  new_whale w_89e159a w_abe383e w_32a920b w_1ecfe96
0.17209880054
  new_whale w_95874a5 w_12c3d3d w_1287fbc w_25871da
0.225434482098
  new_whale w_78f2e92 w_3e1ba5b w_987a36f w_71c7322
0.178572177887
  new_whale w_b0e05b1 w_c10ffe9 w_0e737d0 w_4e68ddc
0.999999761581
  w_43b50e5 new_whale w_95874a5 w_97f5054 w_3f365f3
0.0816478431225
  new_whale w_cae7677 w_fe49bc4 w_1287fbc w_b0a

In [119]:
prediction = 'Image,Id\n'
for i in range(15610):
    prediction += test_list[i]
    prediction += ', '
    prediction += get_ids_or_new_whale(label_prediction[i])
    prediction += '\n'

#print(prediction)

In [120]:
f = open('predictions/prediction_p_new=.95.csv','w')
f.write(prediction)
f.close()