In [None]:
import os
import csv
import pickle
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet50 import preprocess_input
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd
import xgboost as xgb
from constants import *

In [None]:
models = []
for model_file in os.listdir(FINAL_MODELS_DIR):
    models.append(load_model(os.path.join(FINAL_MODELS_DIR, model_file)))
    print('loaded model {}'.format(model_file))


gen = ImageDataGenerator(
    preprocessing_function=preprocess_input
).flow_from_directory(
    os.path.join(DATA_DIR, 'train'),
    target_size=INPUT_SIZE,
    class_mode='sparse',
    shuffle=True,
    batch_size=BATCH_SIZE)

In [None]:
os.makedirs('ensemble', exist_ok=True)
with open(os.path.join('ensemble', 'ensemble.csv'), mode='w') as csv_file:
    writer = csv.writer(csv_file, delimiter=',', quotechar='"',
                        quoting=csv.QUOTE_MINIMAL)

    models_header = ['m{}_{}'.format(i, j)
                     for i in range(len(models)) for j in range(42)]
    writer.writerow(models_header + ['y'])

    total_tested, num_correct = 0, 0
    for test_images, test_labels in gen:
        total_tested += BATCH_SIZE

        p_sum = np.zeros((test_images.shape[0], 42))
        p_all = [[] for _ in range(BATCH_SIZE)]

        for model in models:
            preds = model.predict(test_images)
            p_sum += preds
            for i, p in enumerate(preds):
                p_all[i].extend(p)

        for i, p in enumerate(np.argmax(p_sum, axis=1)):
            if p == int(test_labels[i]):
                num_correct += 1

        for i, p in enumerate(p_all):
            writer.writerow(p + [test_labels[i]])

        print("{} correct out of {} ({:.5%})".format(
            num_correct, total_tested, num_correct/total_tested))

        if total_tested > 88888:
            break

In [None]:
df = pd.read_csv(os.path.join('ensemble', 'ensemble.csv'))
X, y = df.iloc[:, :-1], df.iloc[:, -1]
data_dmatrix = xgb.DMatrix(data=X, label=y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

xg_reg = xgb.XGBClassifier(
    objective ='reg:logistic',
    eta=0.05,
    max_depth=1,
    min_child_weight=9,
)

xg_reg.fit(X_train,y_train,verbose=1)
preds = xg_reg.predict(X_test)
accuracy_score(y_test, preds)

In [None]:
final_model = xgb.XGBClassifier(
    objective ='reg:logistic',
    eta=0.05,
    max_depth=1,
    min_child_weight=9,
)
final_model.fit(X,y,verbose=1)

In [None]:
pickle.dump(final_model, open(os.path.join('ensemble',"model.pickle.dat"), "wb"))