In [71]:
import tensorflow as tf
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.applications.densenet import preprocess_input, DenseNet121
from tensorflow.keras.layers import GlobalAveragePooling2D, Input, Lambda, AveragePooling1D
import tensorflow.keras.backend as K
from PIL import Image
import os
from parse import parse
from matplotlib import pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook
import pickle

In [63]:
train_path = 'data/train_images'
train_img_paths = [os.path.join(path, d) for d in os.listdir(train_path)]
test_path = 'data/test_images'
test_img_paths = [os.path.join(path, d) for d in os.listdir(test_path)]

In [64]:
size = [256, 256]
shape = [*size, 3]
def load_image(img_path):
    img = Image.open(img_path)
    img = img.resize(size)
    img = np.array(img).astype(np.float32)
    img = preprocess_input(img)
    return img

In [34]:
input_tensor = Input(shape)
densenet = DenseNet121(input_tensor=input_tensor,
                    weights='imagenet',
                    include_top=False)

In [56]:
out = densenet.output
out = GlobalAveragePooling2D()(out)
out = Lambda(lambda x: K.expand_dims(x, axis=-1))(out)
out = AveragePooling1D(4)(out)
out = Lambda(lambda x: x[:,:,0])(out)

model = Model(input_tensor, out)

In [54]:
train_df = pd.read_csv('data/train/train.csv')
test_df = pd.read_csv('data/test/test.csv')
train_ids = train_df['PetID'].tolist()
test_ids = test_df['PetID'].tolist()
len(train_ids), len(test_ids)

(14993, 3948)

In [74]:
# Will use only profile images
# Predict the features of train and test images
def get_profile_img_path(petid, absp):
    return os.path.join(absp, '{}-1.jpg'.format(petid))

train_features = {}
test_features = {}
batch_size = 32
num_batches = int(np.ceil(len(train_ids) / batch_size))
for it in tqdm_notebook(range(num_batches)):
    ids = train_ids[it * batch_size: (it + 1) * batch_size]
    batch = np.zeros(shape=[batch_size, *shape], dtype=np.float32)
    err_inds = []
    for i, petid in enumerate(ids):
        try:
            batch[i] = load_image(get_profile_img_path(petid, train_path))
        except:
            err_inds.append(i)
    f = model.predict(batch)
    for i, petid in enumerate(ids):
        train_features[petid] = f[i]
        
train_features

{}

In [None]:
num_batches = int(np.ceil(len(test_ids) / batch_size))
for it in tqdm_notebook(range(num_batches)):
    ids = test_ids[it * batch_size: (it + 1) * batch_size]
    batch = np.zeros(shape=[batch_size, *shape], dtype=np.float32)
    err_inds = []
    for i, petid in enumerate(ids):
        try:
            batch[i] = load_image(get_profile_img_path(petid, test_path))
        except:
            err_inds.append(i)
    f = model.predict(batch)
    for i, petid in enumerate(ids):
        test_features[petid] = f[i]