# Evaluate Playlist generation / augmentation

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import os, sys, time
import pickle as pkl
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
sys.path.append('src')
from PClassificationMLC import PClassificationMLC
from BinaryRelevance import BinaryRelevance
from evaluate import f1_score_nowarn, calc_F1, calc_precisionK, evaluate_minibatch

In [None]:
data_dir = 'data/aotm-2011/setting2'
fxtrain = os.path.join(data_dir, 'X_train_audio.pkl')
fytrain = os.path.join(data_dir, 'Y_train_audio.pkl')
fxdev   = os.path.join(data_dir, 'X_dev_audio.pkl')
fydev   = os.path.join(data_dir, 'Y_dev_audio.pkl')
fxtest  = os.path.join(data_dir, 'X_test_audio.pkl')
fytest  = os.path.join(data_dir, 'Y_test_audio.pkl')

## Data loading

In [None]:
X_train = pkl.load(open(fxtrain, 'rb'))
Y_train = pkl.load(open(fytrain, 'rb'))
X_dev   = pkl.load(open(fxdev,   'rb'))
Y_dev   = pkl.load(open(fydev,   'rb'))
X_test  = pkl.load(open(fxtest,  'rb'))
Y_test  = pkl.load(open(fytest,  'rb'))

In [None]:
print('Train: %15s %15s' % (X_train.shape, Y_train.shape))
print('Dev  : %15s %15s' % (X_dev.shape,   Y_dev.shape))
print('Test : %15s %15s' % (X_test.shape,  Y_test.shape))

In [None]:
rows = ['LR', 'PC', 'LR-2017']
cols = ['F1', 'Precision@K']
df = pd.DataFrame(index=rows, columns=cols)
df.head()

## Evaluate on dev set --BR

In [None]:
C = 1
#fname = os.path.join('data', 'aotm2011-params-br/br-aotm2011-C-%s.pkl' % str(C))
fname = os.path.join(data_dir, 'br-aotm2011-C-%g.pkl' % C)
br = pkl.load(open(fname, 'rb'))

Evaluate F1: threshold for logistic regression is 0 for logits, 0.5 for probabilities.

In [None]:
F1 = evaluate_minibatch(br, calc_F1, X_dev, Y_dev, threshold=0, batch_size=1500, verbose=1)
avgF1 = np.mean(F1)
F1_all.append(avgF1)
print('\nF1: %g' % avgF1)

In [None]:
np.mean(F1)

`C: 0.1, Threshold: 0.05, F1: 0.00254648`  
`C:   1, Threshold: 0.05, F1: 0.0121401`

Evaluate Precision@K.

In [None]:
pak = evaluate_minibatch(br, calc_precisionK, X_dev, Y_dev, threshold=None, batch_size=1500, verbose=1)
print('\nPrecision@K: %g' % np.mean(pak))

`C: 0.1, Precision@K: 0.0884917`  
`C:   1, Precision@K: 0.0943461`

## Evaluate on test set -- BR

In [None]:
best_C = 1
best_TH = 0.05
fname = os.path.join('data', 'aotm2011-params-br/br-aotm2011-C-%s.pkl' % str(best_C))
best_br = pkl.load(open(fname, 'rb'))

In [None]:
F1_test_br = evaluate_minibatch(best_br, calc_F1, X_test, Y_test, threshold=best_TH, batch_size=1500, verbose=1)
print('\nTest F1: %g' % np.mean(F1_test_br))

In [None]:
pak_test_br = evaluate_minibatch(best_br, calc_precisionK, X_test, Y_test,threshold=None,batch_size=1500,verbose=1)
print('\nTest Precision@K: %g' % np.mean(pak_test_br))

In [None]:
df.loc['LR', 'F1'] = np.mean(F1_test_br)
df.loc['LR', 'Precision@K'] = np.mean(pak_test_br)

In [None]:
df

## Evaluate on dev set -- PC

In [None]:
C_set = [0.1, 0.3, 1, 3, 10, 30, 100, 300, 1000, 3000, 10000, 30000]
p_set = [1, 2, 3, 4, 5, 6, 7, 8]
metrics_pc = [ ]
print('%15s %15s %15s %15s %15s' % ('C', 'p', 'Threshold', 'F1', 'Precision@K'))
for C in C_set:
    for p in p_set:
        #fname = os.path.join('data', 'aotm2011-params-pc/pc-aotm2011-C-%g-p-%g.pkl' % (C, p))
        fname = os.path.join(data_dir, 'pc-aotm2011-C-%g-p-%g.pkl' % (C, p))
        if not os.path.exists(fname): continue
        pc_dict = pkl.load(open(fname, 'rb'))
        print('%15s %15s %15s %15s %15s' % ('%g'%pc_dict['C'], '%g'%pc_dict['p'], \
                                            '%g'%pc_dict['Threshold'], '%g'%pc_dict['F1'], \
                                            '%g'%pc_dict['Precision@K']))
        metrics_pc.append((pc_dict['C'], pc_dict['p'], pc_dict['Threshold'],pc_dict['F1'],pc_dict['Precision@K']))
        clf = PClassificationMLC()
        clf.load_params(fname)
        th = pc_dict['Threshold']
        F1 = evaluate_minibatch(clf, calc_F1, X_test, Y_test, threshold=th, batch_size=1500, verbose=1)
        print('\nTest F1: %g' % np.mean(F1))
        pak = evaluate_minibatch(clf, calc_precisionK, X_test, Y_test, threshold=None, batch_size=1500, verbose=1)
        print('\nTest Precision@K: %g' % np.mean(pak))        

In [None]:
keyix = 3  # F1
sorted_metrics_pc = sorted(metrics_pc, key=lambda x: x[keyix], reverse=True)
print('Best hyper-param:\n(C, p, Threshold, F1, Precision@K):', sorted_metrics_pc[0])

## Evaluate on test set -- PC

In [None]:
best_C = 30000 #10000 #300   #3000
best_p = 2 #2 #3     #6
best_TH = 0.1 #0.15 #0.1
#fname = os.path.join('data', 'aotm2011-params-pc/pc-aotm2011-C-%g-p-%g.pkl' % (best_C, best_p))
fname = os.path.join(data_dir, 'pc-aotm2011-C-%g-p-%g.pkl' % (best_C, best_p))
best_pc = PClassificationMLC()
best_pc.load_params(fname)

In [None]:
bestdict = pkl.load(open(fname, 'rb'))

In [None]:
plt.plot(bestdict['cost'])

In [None]:
F1_test_pc = evaluate_minibatch(best_pc, calc_F1, X_test, Y_test, threshold=best_TH, batch_size=1500, verbose=1)
print('\nTest F1: %g' % np.mean(F1_test_pc))

In [None]:
pak = evaluate_minibatch(best_pc, calc_precisionK, X_train, Y_train, threshold=None, batch_size=1500, verbose=1)

In [None]:
print('\nTrain P@K: %g' % np.mean(pak))

In [None]:
preds = best_pc.decision_function(X_train[:10])

In [None]:
test_ex_idx = 2

plt.hist(preds[test_ex_idx], bins=50)

y_true = Y_train[test_ex_idx].toarray()

pos_idx = np.where(y_true)[1]
print('prediction of true positives')
print(preds[test_ex_idx][pos_idx])
print('top predictions')
np.sort(preds[test_ex_idx])[-20:]

In [None]:
pak_test_pc = evaluate_minibatch(best_pc, calc_precisionK, X_test, Y_test,threshold=None,batch_size=1500,verbose=1)
print('\nTest Precision@K: %g' % np.mean(pak_test_pc))

In [None]:
df.loc['PC', 'F1'] = np.mean(F1_test_pc)
df.loc['PC', 'Precision@K'] = np.mean(pak_test_pc)

## Result table

In [None]:
df.loc['LR-2017', 'F1'] = 0.031

In [None]:
tab_str = df.to_latex(float_format=lambda x: '$%.4f$' % x, na_rep='-', multirow=False, escape=False)

In [None]:
print('\\begin{table}[!h]')
print('\centering')
print('\\caption{Performance on test set}')
print('\\label{tab:perf}')    
print(tab_str)
print('\\end{table}')