# Multi-label classification -- p-classification loss

In [None]:
%load_ext line_profiler
%load_ext autoreload
%autoreload 2

import os, sys, time
import numpy as np

from scipy.optimize import check_grad
from scipy.sparse import issparse, csr_matrix

In [None]:
sys.path.append('src/')
sys.path.append('src/models')
from MLC import objective, risk_pclassification, DataHelper
from tools import create_dataset, dataset_names, nLabels_dict

In [None]:
dataset_names

In [None]:
data_ix = 0

In [None]:
dataset_name = dataset_names[data_ix]
nLabels = nLabels_dict[dataset_name]
print(dataset_name, nLabels)

In [None]:
data_dir = 'data'
SEED = 918273645

Load data.

In [None]:
X_train, Y_train = create_dataset(dataset_name, train_data=True, shuffle=True, random_state=SEED)
X_test,  Y_test  = create_dataset(dataset_name, train_data=False)

Feature normalisation.

In [None]:
X_train_mean = np.mean(X_train, axis=0).reshape((1, -1))
X_train_std = np.std(X_train, axis=0).reshape((1, -1)) + 10 ** (-6)
X_train -= X_train_mean
X_train /= X_train_std
X_test  -= X_train_mean
X_test  /= X_train_std

In [None]:
def print_dataset_info(X_train, Y_train, X_test, Y_test):
    N_train, D = X_train.shape
    K = Y_train.shape[1]
    N_test = X_test.shape[0]
    print('%-45s %s' % ('Number of training examples:', '{:,}'.format(N_train)))
    print('%-45s %s' % ('Number of test examples:', '{:,}'.format(N_test)))
    print('%-45s %s' % ('Number of features:', '{:,}'.format(D)))
    print('%-45s %s' % ('Number of labels:', '{:,}'.format(K)))
    avgK_train = np.mean(np.sum(Y_train, axis=1))
    avgK_test  = np.mean(np.sum(Y_test, axis=1))
    print('%-45s %.3f (%.2f%%)' % ('Average number of positive labels (train):', avgK_train, 100*avgK_train / K))
    print('%-45s %.3f (%.2f%%)' % ('Average number of positive labels (test):', avgK_test, 100*avgK_test / K))
    #print('%-45s %.4f%%' % ('Average label occurrence (train):', np.mean(np.sum(Y_train, axis=0)) / N_train))
    #print('%-45s %.4f%%' % ('Average label occurrence (test):', np.mean(np.sum(Y_test, axis=0)) / N_test))
    print('%-45s %.3f%%' % ('Sparsity (percent) (train):', 100 * np.sum(Y_train) / np.prod(Y_train.shape)))
    print('%-45s %.3f%%' % ('Sparsity (percent) (test):', 100 * np.sum(Y_test) / np.prod(Y_test.shape)))

In [None]:
print('%-45s %s' % ('Dataset:', dataset_name))
print_dataset_info(X_train, Y_train, X_test, Y_test)

check gradient.

In [None]:
%%script false
PU = np.zeros((Y_train.shape[0], 3), dtype=Y_train.dtype)
PU[[0, 1, 2, 10], [0, 1, 1, 2]] = 1
upl_ix = [[2, 3, 4], [5, 6, 7, 8, 9], [10, 11], [12, 13, 14, 15]]
w0 = 0.001 * np.random.randn((Y_train.shape[1] + 3) * X_train.shape[1] + 1)
loss = 'both'
check_grad(\
lambda w: obj_pclassification(w, X_train, Y_train, C1=10, C2=1, C3=2, p=3, loss_type=loss,
                              PU=PU, user_playlist_indices=upl_ix)[0], 
lambda w: obj_pclassification(w, X_train, Y_train, C1=10, C2=1, C3=2, p=3, loss_type=loss,
                              PU=PU, user_playlist_indices=upl_ix)[1],w0)

In [None]:
%%script false
cliques = [[2, 3, 4], [5, 6, 7, 8, 9], [10, 11], [12, 13]]
#cliques = None
w0 = 0.001 * np.random.randn(Y_train.shape[1] * X_train.shape[1] + 1)
#w0 = np.zeros(Y_train.shape[1] * X_train.shape[1] + 1)
dw = np.zeros_like(w0)
loss = 'example'
bs=5 if loss == 'label' else 100
Y_train = csr_matrix(Y_train)
data_helper_example = None if loss == 'label' else DataHelper(Y_train, ax=0, batch_size=bs)
data_helper_label = None if loss == 'example' else DataHelper(Y_train, ax=1, batch_size=bs)
#%lprun -f accumulate_risk \
#%lprun -f objective \
check_grad(lambda w: objective(w, dw, X_train, Y_train, C1=10, C2=1, C3=2, p=3, loss_type=loss, cliques=cliques, \
                               data_helper_example=data_helper_example, data_helper_label=data_helper_label), \
           lambda w: dw, w0)

In [None]:
# objective(w, dw, X, Y, C1=1, C3=1, p=1, cliques=None, data_helper=None, fnpy=None)

cliques = [[2, 3, 4], [5, 6, 7, 8, 9], [10, 11], [12, 13]]
#cliques = None
w0 = 0.001 * np.random.randn(Y_train.shape[1] * (X_train.shape[1] + 1))
#w0 = np.zeros(Y_train.shape[1] * (X_train.shape[1] + 1))
dw = np.zeros_like(w0)
bs=5
Y_train = csr_matrix(Y_train)
data_helper = DataHelper(Y_train, ax=1, batch_size=bs)
#%lprun -f accumulate_risk \
#%lprun -f objective \
check_grad(lambda w: objective(w, dw, X_train, Y_train, C1=10, C3=2, p=3, \
                               cliques=cliques, data_helper=data_helper), \
           lambda w: dw, w0)