In [1]:
# Preparation
import os
import sys
import shutil
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

sys.path.append('common')
sys.path.append('external')
import util
from domain_modules import *
from domain_models import *

DATAROOT = os.path.join(os.path.expanduser('~'), '.kaggle/competitions/acoustic-scene-2018')

Using TensorFlow backend.


In [2]:
y_labels_train = pd.read_csv(os.path.join(DATAROOT, 'y_train.csv'), sep=',')['scene_label'].tolist()
labels = sorted(list(set(y_labels_train)))
label2int = {l:i for i, l in enumerate(labels)}
num_classes = len(labels)
y_train_org = np.array([label2int[l] for l in y_labels_train])
splitlist = pd.read_csv(os.path.join(DATAROOT, 'crossvalidation_train.csv'), sep=',')['set'].tolist()
y_valid_ref = np.array([y for i, y in enumerate(y_train_org) if splitlist[i] == 'test'])

In [3]:
from Simple import SimpleTuner

def weighted_arithmetic_mean_preds(preds, weights):
    '''Calculates weighted ensembled predictions'''
    norm_weights = weights / np.sum(weights)
    return np.average(preds, axis=0, weights=norm_weights)

def weighted_geometric_mean_preds(preds, weights):
    '''Calculates weighted ensembled predictions'''
    norm_weights = weights / np.sum(weights)
    predictions = np.ones_like(preds[0])
    for pred, weight in zip(preds, weights):
        predictions = (1.0 - weight)*predictions + weight*predictions*pred
    predictions = predictions**(1./len(preds))
    return predictions

def weighted_mean_preds(preds, weights, weighted_mean_fn=weighted_arithmetic_mean_preds):
    return weighted_mean_fn(preds, weights)

def weighted_mean_pred_cls(preds, weights, weighted_mean_fn=weighted_arithmetic_mean_preds):
    '''Calculates weighted ensembled prediction results'''
    return np.argmax(weighted_mean_preds(preds, weights, weighted_mean_fn), axis=1)

def weighted_mean_acc(preds, weights, ref_cls,
                                 weighted_mean_fn=weighted_arithmetic_mean_preds):
    '''Calculates accuracy for a set of weights'''
    y_ensemble_pred_cls = weighted_mean_pred_cls(preds, weights, weighted_mean_fn)
    n_eq = [result == ref for result, ref in zip(y_ensemble_pred_cls, ref_cls)]
    return np.sum(n_eq) / len(y_ensemble_pred_cls)

class EnsembleOptimizer:
    def __init__(self, pred_apps, labels, y_train_ref, mean_type='geometric'):
        self.labels = labels
        self.mean_type = mean_type
        self.weighted_mean_fn = weighted_arithmetic_mean_preds \
            if mean_type == 'arithmetic' else weighted_geometric_mean_preds
        # List of train/test preds
        self.train_preds = []
        self.test_preds = []
        for app in pred_apps:
            self.train_preds.append(np.load(app + '/valid_preds.npy'))
            self.test_preds.append(np.load(app + '/test_preds.npy'))
        # Load y_train reference
        self.y_train_ref = y_train_ref
    def _train_weighted_mean_acc(self, weights):
        acc = weighted_mean_acc(self.train_preds, weights, self.y_train_ref, self.weighted_mean_fn)
        #print(weights, acc)
        return acc
    def optimize(self, submission_filename):
        weight_dim = len(self.train_preds)
        optimization_domain_vertices = [[0] * weight_dim for _ in range(weight_dim + 1)]
        for i in range(1, weight_dim + 1):
            optimization_domain_vertices[i][i - 1] = 1
        number_of_iterations = 3000
        exploration = 0.01 # optional, default 0.15
        # Optimize weights
        tuner = SimpleTuner(optimization_domain_vertices, self._train_weighted_mean_acc,
                            exploration_preference=exploration)
        tuner.optimize(number_of_iterations)
        self.best_objective_value, self.best_weights = tuner.get_best()
        print('Ensembled Accuracy (Best objective value) =', self.best_objective_value)
        print('Optimum weights =', self.best_weights)
        self._write_weighted_test_result(submission_filename)
    def _write_weighted_test_result(self, submission_filename):
        #train_preds, test_preds, y_train_ref, labels
        predictions = weighted_mean_preds(self.test_preds, self.best_weights, self.weighted_mean_fn)
        # Make a ensembled submission file
        top_labels = [self.labels[np.argmax(pred)] for pred in predictions]
        with open(submission_filename, 'w') as f:
            f.writelines(['Id,Scene_label\n'])
            f.writelines(['%d,%s\n' % (i, label) for i, label in enumerate(top_labels)])
        print('Wrote %s mean preds to:' % self.mean_type, submission_filename)

pred_apps = ['base_clr_randers_mixup', 
             'event_pattern',
             'timewise_mean']
ensemble_optimizer = EnsembleOptimizer(pred_apps, labels=labels, y_train_ref=y_valid_ref, mean_type='arithmetic')
ensemble_optimizer.optimize('base_event_time_opt_ens_arith_mean_submission.csv')
ensemble_optimizer = EnsembleOptimizer(pred_apps, labels=labels, y_train_ref=y_valid_ref, mean_type='geometric')
ensemble_optimizer.optimize('base_event_time_opt_ens_geo_mean_submission.csv')

Ensembled Accuracy (Best objective value) = 0.841720779221
Optimum weights = [ 0.35817796  0.292873    0.31946865]
Wrote arithmetic mean preds to: base_event_time_opt_ens_arith_mean_submission.csv
Ensembled Accuracy (Best objective value) = 0.841720779221
Optimum weights = [ 0.3530285   0.31018993  0.29398109]
Wrote geometric mean preds to: base_event_time_opt_ens_geo_mean_submission.csv


In [24]:
pred_apps = ['base_clr_randers_mixup',
             'event_pattern',
             'event_pattern_alexnet',
             'timewise_mean',
]
ensemble_optimizer = EnsembleOptimizer(pred_apps, labels=labels, y_train_ref=y_valid_ref, mean_type='arithmetic')
ensemble_optimizer.optimize('base_event2_time_opt_ens_arith_mean_submission.csv')
ensemble_optimizer = EnsembleOptimizer(pred_apps, labels=labels, y_train_ref=y_valid_ref, mean_type='geometric')
ensemble_optimizer.optimize('base_event2_time_opt_ens_geo_mean_submission.csv')

Ensembled Accuracy (Best objective value) = 0.858766233766
Optimum weights = [ 0.40899306  0.17953915  0.14747649  0.21167794]
Wrote arithmetic mean preds to: base_event2_time_opt_ens_arith_mean_submission.csv
Ensembled Accuracy (Best objective value) = 0.859577922078
Optimum weights = [ 0.3213005   0.18376267  0.20541213  0.26858041]
Wrote geometric mean preds to: base_event2_time_opt_ens_geo_mean_submission.csv


In [26]:
pred_apps = ['base_clr_randers_mixup',
             'event_pattern_alexnet',
]
ensemble_optimizer = EnsembleOptimizer(pred_apps, labels=labels, y_train_ref=y_valid_ref, mean_type='arithmetic')
ensemble_optimizer.optimize('base_eventalex_opt_ens_arith_mean_submission.csv')
ensemble_optimizer = EnsembleOptimizer(pred_apps, labels=labels, y_train_ref=y_valid_ref, mean_type='geometric')
ensemble_optimizer.optimize('base_eventalex_opt_ens_geo_mean_submission.csv')

Ensembled Accuracy (Best objective value) = 0.849837662338
Optimum weights = [ 0.60389057  0.31677588]
Wrote arithmetic mean preds to: base_eventalex_opt_ens_arith_mean_submission.csv
Ensembled Accuracy (Best objective value) = 0.852272727273
Optimum weights = [ 0.56429143  0.40923397]
Wrote geometric mean preds to: base_eventalex_opt_ens_geo_mean_submission.csv


In [21]:
pred_apps = ['base_clr_randers_mixup', 
             #'event_pattern',
             'timewise_mean'
]
ensemble_optimizer = EnsembleOptimizer(pred_apps, labels=labels, y_train_ref=y_valid_ref, mean_type='arithmetic')
ensemble_optimizer.optimize('base_time_opt_ens_arith_mean_submission.csv')
ensemble_optimizer = EnsembleOptimizer(pred_apps, labels=labels, y_train_ref=y_valid_ref, mean_type='geometric')
ensemble_optimizer.optimize('base_time_opt_ens_geo_mean_submission.csv')

Ensembled Accuracy (Best objective value) = 0.82711038961
Optimum weights = [ 0.51733185  0.4104946 ]
Wrote arithmetic mean preds to: base_time_opt_ens_arith_mean_submission.csv
Ensembled Accuracy (Best objective value) = 0.828733766234
Optimum weights = [ 0.49658243  0.40668303]
Wrote geometric mean preds to: base_time_opt_ens_geo_mean_submission.csv


In [22]:
pred_apps = ['base_clr_randers_mixup', 
             'event_pattern',
             #'timewise_mean'
]
ensemble_optimizer = EnsembleOptimizer(pred_apps, labels=labels, y_train_ref=y_valid_ref, mean_type='arithmetic')
ensemble_optimizer.optimize('base_event_opt_ens_arith_mean_submission.csv')
ensemble_optimizer = EnsembleOptimizer(pred_apps, labels=labels, y_train_ref=y_valid_ref, mean_type='geometric')
ensemble_optimizer.optimize('base_event_opt_ens_geo_mean_submission.csv')

Ensembled Accuracy (Best objective value) = 0.833603896104
Optimum weights = [ 0.55645813  0.27725353]
Wrote arithmetic mean preds to: base_event_opt_ens_arith_mean_submission.csv
Ensembled Accuracy (Best objective value) = 0.833603896104
Optimum weights = [ 0.48243248  0.32977935]
Wrote geometric mean preds to: base_event_opt_ens_geo_mean_submission.csv
