# Multilabel

In [1]:
import os
import sys
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline

In [2]:
sys.path.append('..')
from kernel import KernelComputer, KernelRegressor
from dataloader import MULANLoader, FoldsGenerator
from multilabel import DF, IL

In [28]:
np.random.seed(0)

### Choose datasets to play with

In [29]:
names = ['CAL500']

### Choose hyparameters for the simulations

In [41]:
kernel_type = 'Gaussian'
skewed = False
nb_folds = 8
corruptions = [0, .1, .2, .3, .4, .5, .6, .7, .8, .9]
# sigmas = [1e1, 5e0, 1e0, 5e-1, 1e-1, 1e-2]
sigmas = [3, 1, 7e-1, 5e-1]
# sigmas = [1]
lambdas = [1e3, 1e1, 1e0, 1e-1, 1e-3, 1e-6]

In [None]:
kernel = KernelComputer(kernel_type, sigma=1)
kernel_reg = KernelRegressor(kernel)

err_df, err_il = {}, {}
shape_err = (len(corruptions), nb_folds, len(sigmas), len(lambdas))

for name in names:
    print(name)
    
    loader = MULANLoader(name)
    x, y = loader.get_trainset()
    
    ind = y.sum(axis=0) != 0
    y = y[:, ind]
    k = int(np.sum(y == 1, axis=1).mean()) + 10
    
    met_df, met_il = DF(kernel, k), IL(kernel, k)
    err_df[name], err_il[name] = np.empty(shape_err), np.empty(shape_err)
    
    S = np.empty((*y.shape, len(corruptions)))
    for i_c, corruption in enumerate(corruptions):
        S[..., i_c] = loader.synthetic_corruption(y, corruption, skewed=skewed)
        
    floader = FoldsGenerator(x, y, S, nb_folds=nb_folds)
    for fold in range(nb_folds):
        (x_train, S_trains), (x_test, y_test) = floader()
        kernel_reg.set_support(x_train)
        n_train, dim = x_train.shape
        
        for i_s, c_sigma in enumerate(sigmas):
            sigma = c_sigma * dim
            kernel_reg.update_sigma(sigma)

            for i_l, c_lambda in enumerate(lambdas):
                lambd = c_lambda / np.sqrt(n_train)
                kernel_reg.update_lambda(lambd)
                alpha_train = kernel_reg(x_train)
                alpha = kernel_reg(x_test)

                for i_c, corruption in enumerate(corruptions):

                    S_train = S_trains[..., i_c]
                    y_dis = met_df.disambiguation(alpha_train, S_train, k)
                    pred_df = alpha @ y_dis
                    pred_il = alpha @ S_train

                    for pred in [pred_df, pred_il]:
                        idx = np.argsort(pred, axis=1)[:, -k:]
                        pred[:] = -1
                        met_il.fill_topk_pred(pred, idx)

                    err_df[name][i_c, fold, i_s, i_l] = ((pred_df - y_test) != 0).mean()
                    err_il[name][i_c, fold, i_s, i_l] = ((pred_il - y_test) != 0).mean()
            print(end='.')

CAL500
.....

### Show results

In [None]:
mus = np.empty((len(names), 2, len(corruptions)))
stds = np.empty((len(names), 2, len(corruptions)))
best_lambdas = np.empty((len(names), 2, len(corruptions)))
best_sigmas = np.empty((len(names), 2, len(corruptions)))

for i, name in enumerate(names):
    for j, err in zip([0, 1], [err_df, err_il]):
        tmp = err[name].reshape(len(corruptions), nb_folds, -1)
        mu = tmp.mean(axis=1)
        ind = mu.argmin(axis=-1)
        for k in range(len(ind)):
            mus[i, j, k] = mu[k, ind[k]]
            stds[i, j, k] = tmp[k, :, ind[k]].std()            
            best_lambdas[i, j, k] = lambdas[ind[k] % len(lambdas)]
            best_sigmas[i, j, k] = sigmas[ind[k] // len(lambdas)]

In [None]:
for i, name in enumerate(names):
    fig, ax = plt.subplots()
    a = ax.errorbar([100*i for i in corruptions[:]], mus[i, 0][:], .5*stds[i, 0][:], capsize=5, linewidth=3, capthick=3)
    b = ax.errorbar([100*i for i in corruptions[:]], mus[i, 1][:], .5*stds[i, 1][:], capsize=5, linewidth=3, capthick=3)
    ax.legend([a, b], ['DF', "IL"], prop={'size':15})
    ax.grid()
    ax.set_title(name, size=25)
    ax.set_ylabel('Loss', size=20)
    ax.set_xlabel('Corruption (in %)', size=20)
    ax.tick_params(axis='both', which='major', labelsize=12)
    plt.tight_layout()
    fig.savefig(os.path.join('savings', 'ml', name + {True: '_skewed', False:''}[skewed] + '.pdf'))

In [None]:
best_lambdas, best_sigmas