In [24]:
import cv2
import numpy as np
import os
import pandas as pd
from sklearn.preprocessing import LabelBinarizer
from glob import glob
from keras.applications import VGG16
from keras.models import Sequential
from keras.layers import Flatten, Dropout, Dense
from keras.optimizers import SGD
from concurrent.futures import ProcessPoolExecutor

In [17]:
def resize_img(file_path):
    img = cv2.imread(file_path, cv2.IMREAD_COLOR)
    return cv2.resize(img, (150, 150), interpolation=cv2.INTER_CUBIC)


def prep_data(images):
    count = len(images)
    data = np.ndarray((count, 150, 150, 3), dtype=np.uint8)

    with ProcessPoolExecutor(max_workers=4) as executor:
        data[:] = list(executor.map(resize_img, images))
    
    return data


test = prep_data(glob('../data/test/*'))

model = VGG16(
    include_top=False,
    weights='imagenet', 
    input_shape=(150, 150, 3))

test_feature = model.predict(test, verbose=1)



In [19]:
model = Sequential()
model.add(Flatten(input_shape=test_feature.shape[1:]))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(120, activation='sigmoid'))

model.compile(
    optimizer=SGD(lr=0.001, momentum=0.9, decay=1e-6, nesterov=False), 
    loss='categorical_crossentropy', 
    metrics=['accuracy']
)

In [26]:
predict = model.predict(test_feature, verbose=1)

lb = LabelBinarizer()
lb.fit(os.listdir("../data/train"))

data = {}
for k, i in enumerate(lb.classes_):
    data[i] = predict[:, k]


dt = pd.DataFrame(data)

dt.insert(0, "id", [i.split(".")[0] for i in os.listdir("../data/test")])
dt.to_csv('predictions.csv', index=False)

