In [2]:
import numpy as np
import paper

n_folds = 4
test_sets = [np.load('TS'+str(i)+'.npy') for i in range(n_folds)]
held_outs = [np.load('HO'+str(i)+'.npy') for i in range(n_folds)]
maes = [np.load('MAE'+str(i)+'.npy') for i in range(n_folds)]

In [3]:
ts = test_sets[0]
ts

array([   1,    2,   11, ..., 6034, 6036, 6038])

In [4]:
ho = held_outs[0]
ho

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ..., 
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]], dtype=bool)

In [5]:
mae = maes[0]
mae

array([ 0.79286365,  0.78426155,  0.75405791, ...,  1.09939118,
        0.72314558,  0.54463607])

In [6]:
R_test = np.copy(paper.Rnan)
R_test[ho] = np.nan
R_test

array([[  5.,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       ..., 
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [ nan,  nan,  nan, ...,  nan,  nan,  nan],
       [  3.,  nan,  nan, ...,  nan,  nan,  nan]])

In [7]:
n_available = np.sum(R_test > 0, axis = 1)[ts]
n_available

  if __name__ == '__main__':


array([13,  6,  3, ..., 28, 21, 13])

In [8]:
print('Num users:', mae.shape[0])
print('Average MAE:',np.mean(mae))

Num users: 1510
Average MAE: 0.792713801177


In [9]:
d1_inds = n_available < 4
d1_maes = mae[d1_inds]
print('Num users:', d1_maes.shape[0])
print('Average MAE:',np.mean(d1_maes))

Num users: 211
Average MAE: 0.885348087689


In [10]:
d2_inds = np.logical_and(n_available > 3, n_available < 10)
d2_maes = mae[d2_inds]
print('Num users:', d2_maes.shape[0])
print('Average MAE:',np.mean(d2_maes))

Num users: 514
Average MAE: 0.822263555908


In [11]:
d3_inds = n_available > 9
d3_maes = mae[d3_inds]
print('Num users:', d3_maes.shape[0])
print('Average MAE:',np.mean(d3_maes))

Num users: 785
Average MAE: 0.748466147182


In [12]:
def get_maes(R, mae, test_set, held_outs):
    R_test = np.copy(R)
    R_test[held_outs] = np.nan
    n_available = np.sum(R_test > 0, axis = 1)[test_set]
    d1_inds = n_available < 4
    d1_maes = mae[d1_inds]
    d2_inds = np.logical_and(n_available > 3, n_available < 10)
    d2_maes = mae[d2_inds]
    d3_inds = n_available > 9
    d3_maes = mae[d3_inds]
    return d1_maes, d2_maes, d3_maes, mae

def get_cumul_maes(R, maes, test_sets, held_outs):
    d1_maes = []
    d2_maes = []
    d3_maes = []
    all_maes = []
    for test_set, held_out, mae in zip(test_sets, held_outs, maes):
        d1_mae, d2_mae, d3_mae, all_mae = get_maes(R, mae, test_set, held_out)
        d1_maes.append(d1_mae)
        d2_maes.append(d2_mae)
        d3_maes.append(d3_mae)
        all_maes.append(all_mae)
    return np.hstack(d1_maes), np.hstack(d2_maes), np.hstack(d3_maes), np.hstack(all_maes)

d1_maes, d2_maes, d3_maes, all_maes = get_cumul_maes(paper.Rnan, maes, test_sets, held_outs)



In [13]:
print('D1 (extremely cold start) users:')
print('Num users:', d1_maes.shape[0])
print('Average MAE:',np.mean(d1_maes))

print('D2 (cold start) users:')
print('Num users:', d2_maes.shape[0])
print('Average MAE:',np.mean(d2_maes))

print('D3 (warm start) users:')
print('Num users:', d3_maes.shape[0])
print('Average MAE:',np.mean(d3_maes))

print('All users:')
print('Num users:', all_maes.shape[0])
print('Average MAE:',np.mean(all_maes))

D1 (extremely cold start) users:
Num users: 809
Average MAE: 0.88852710922
D2 (cold start) users:
Num users: 2117
Average MAE: 0.817068082276
D3 (warm start) users:
Num users: 3114
Average MAE: 0.749377326288
All users:
Num users: 6040
Average MAE: 0.791740489338


In [14]:
def k_avg(R, maes, test_sets, held_outs):
    n_users = np.zeros((n_folds,4), dtype = np.int)
    ms = np.zeros((n_folds,4))
    for i, (test_set, held_out, mae) in enumerate(zip(test_sets, held_outs, maes)):
        tpl = get_maes(R, mae, test_set, held_out)
        for j, m in enumerate(tpl):
            n_users[i,j] = m.shape[0]
            ms[i,j] = np.mean(m)
    n_users = np.mean(n_users, axis = 0)
    ms = np.mean(ms, axis = 0)
    return n_users, ms

k_avg(paper.Rnan, maes, test_sets, held_outs)



(array([  202.25,   529.25,   778.5 ,  1510.  ]),
 array([ 0.88733087,  0.81733478,  0.74945678,  0.79174049]))