In [43]:
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] =  42

matplotlib.rcParams.update({'font.size': 18})

In [44]:
import os
import shutil
import sys

import numpy as np
from scipy import sparse

import matplotlib.pyplot as plt
%matplotlib inline
#
import seaborn as sn
#sn.set()

import pandas as pd

import bottleneck as bn

In [45]:
import datetime
from copy import deepcopy

In [46]:
import pickle

def save_pkl(obj, filename ):
    with open(filename, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL )
    
def load_pkl(filename ):
    with open(filename, 'rb') as f:
        return pickle.load(f)
 

In [47]:

### change `DATA_DIR` to the location where movielens-20m dataset sits### cha 
DATA_DIR = '/efs/users/hsteck/public/data_for_ease/movielens20mio/'
#DATA_DIR = '/root/projects/data/netflix_prize_data/download/'
#msd data in dawens paper
#DATA_DIR = '/root/projects/data/echonest_taste/'

In [48]:
pro_dir = os.path.join(DATA_DIR, 'pro_sg')


# Training/validation data, hyperparameters
Load the pre-processed training and validation data

In [49]:
unique_sid = list()
with open(os.path.join(pro_dir, 'unique_sid.txt'), 'r') as f:
    for line in f:
        unique_sid.append(line.strip())

n_items = len(unique_sid)

In [50]:
def load_train_data(csv_file):
    tp = pd.read_csv(csv_file)
    n_users = tp['uid'].max() + 1

    rows, cols = tp['uid'], tp['sid']
    data = sparse.csr_matrix((np.ones_like(rows),
                             (rows, cols)), dtype='float64',
                             shape=(n_users, n_items))
    return data

In [51]:
def load_xtx_binary():
    train_data = load_train_data(os.path.join(pro_dir, 'train.csv'))
    X=train_data
    ####normalize users
    #nn=np.array(np.sum(X,axis=1)) [:,0]
    #nn=1.0/np.sqrt(nn)  # user weight normalized on diagonal, approx prop to nn  (off diag)
    #X=  sparse.spdiags(nn, 0, len(nn), len(nn)) * X
    ### remove mean  --> cov
    print (X.shape)
    XtX=np.array(X.T.dot(X).todense()) 

    return [XtX.astype('float32'), X.shape[0] , X]

In [52]:
XtX, userCnt , X =load_xtx_binary()
XtXdiag=deepcopy(np.diag(XtX))

(116677, 20108)


In [53]:
def load_tr_te_data(csv_file_tr, csv_file_te):
    tp_tr = pd.read_csv(csv_file_tr)
    tp_te = pd.read_csv(csv_file_te)

    start_idx = min(tp_tr['uid'].min(), tp_te['uid'].min())
    end_idx = max(tp_tr['uid'].max(), tp_te['uid'].max())

    rows_tr, cols_tr = tp_tr['uid'] - start_idx, tp_tr['sid']
    rows_te, cols_te = tp_te['uid'] - start_idx, tp_te['sid']

    data_tr = sparse.csr_matrix((np.ones_like(rows_tr),
                             (rows_tr, cols_tr)), dtype='float64', shape=(end_idx - start_idx + 1, n_items))
    data_te = sparse.csr_matrix((np.ones_like(rows_te),
                             (rows_te, cols_te)), dtype='float64', shape=(end_idx - start_idx + 1, n_items))
    return data_tr, data_te

In [54]:
test_data_tr, test_data_te = load_tr_te_data(
    os.path.join(pro_dir, 'test_tr.csv'),
    os.path.join(pro_dir, 'test_te.csv'))

In [55]:
N_test = test_data_tr.shape[0]
idxlist_test = range(N_test)


Evaluate function: Normalized discounted cumulative gain (NDCG@k) and Recall@k

In [56]:
def NDCG_binary_at_k_batch(X_pred, heldout_batch, k=100):
    '''
    normalized discounted cumulative gain@k for binary relevance
    ASSUMPTIONS: all the 0's in heldout_data indicate 0 relevance
    '''
    batch_users = X_pred.shape[0]
    idx_topk_part = bn.argpartition(-X_pred, k, axis=1)
    topk_part = X_pred[np.arange(batch_users)[:, np.newaxis],
                       idx_topk_part[:, :k]]
    idx_part = np.argsort(-topk_part, axis=1)
    # X_pred[np.arange(batch_users)[:, np.newaxis], idx_topk] is the sorted
    # topk predicted score
    idx_topk = idx_topk_part[np.arange(batch_users)[:, np.newaxis], idx_part]
    # build the discount template
    tp = 1. / np.log2(np.arange(2, k + 2))

    DCG = (heldout_batch[np.arange(batch_users)[:, np.newaxis],
                         idx_topk].toarray() * tp).sum(axis=1)
    IDCG = np.array([(tp[:min(n, k)]).sum()
                     for n in heldout_batch.getnnz(axis=1)])
    return DCG / IDCG

In [57]:
def Recall_at_k_batch(X_pred, heldout_batch, k=100):
    batch_users = X_pred.shape[0]

    idx = bn.argpartition(-X_pred, k, axis=1)
    X_pred_binary = np.zeros_like(X_pred, dtype=bool)
    X_pred_binary[np.arange(batch_users)[:, np.newaxis], idx[:, :k]] = True

    X_true_binary = (heldout_batch > 0).toarray()
    tmp = (np.logical_and(X_true_binary, X_pred_binary).sum(axis=1)).astype(
        np.float32)
    recall = tmp / np.minimum(k, X_true_binary.sum(axis=1))
    return recall

In [58]:
def evaluate(BB):
    #evaluate in batches
    print(datetime.datetime.now())

    #makeSparseFormat(BB, 0.0)


    batch_size_test=5000
    n100_list, r20_list, r50_list = [], [], []



    for bnum, st_idx in enumerate(range(0, N_test, batch_size_test)):
        end_idx = min(st_idx + batch_size_test, N_test)
        Xtest = test_data_tr[idxlist_test[st_idx:end_idx]]
        print (str(st_idx)+' ... '+str(end_idx))
        if sparse.isspmatrix(Xtest):
            Xtest = Xtest.toarray()
        Xtest = Xtest.astype('float32')

        #pred_val = Xtest.dot(BB_excl)
        #pred_val = (((Xtest-mu) * scaling).dot(BBth) / scaling) +mu   # no bias
        #pred_val = Xtest.dot(beta_0d)  # no bias
        #pred_val =Xtest.dot(beta_lowrank)  
        pred_val =Xtest.dot(BB)  

        # exclude examples from training and validation (if any)
        pred_val[Xtest.nonzero()] = -np.inf
        n100_list.append(NDCG_binary_at_k_batch(pred_val, test_data_te[idxlist_test[st_idx:end_idx]], k=100))
        r20_list.append(Recall_at_k_batch(pred_val, test_data_te[idxlist_test[st_idx:end_idx]], k=20))
        r50_list.append(Recall_at_k_batch(pred_val, test_data_te[idxlist_test[st_idx:end_idx]], k=50))
        #calc_coverageCounts(coverageCounts2, pred_val)
        #break  # do only 5000 users

    n100_list = np.concatenate(n100_list)
    r20_list = np.concatenate(r20_list)
    r50_list = np.concatenate(r50_list)

    print("Test NDCG@100=%.5f (%.5f)" % (np.mean(n100_list), np.std(n100_list) / np.sqrt(len(n100_list))))
    print("Test Recall@20=%.5f (%.5f)" % (np.mean(r20_list), np.std(r20_list) / np.sqrt(len(r20_list))))
    print("Test Recall@50=%.5f (%.5f)" % (np.mean(r50_list), np.std(r50_list) / np.sqrt(len(r50_list))))

    print(datetime.datetime.now())
    return [np.mean(n100_list), np.mean(r20_list), np.mean(r50_list)]

# train 0 diag model

In [59]:
# zero diag, full rank
print(datetime.datetime.now())
L2reg=300.0
pdrop=0.125
boost= pdrop/(1.0-pdrop)
diagStrength =1.0
ii_diag=np.diag_indices(XtX.shape[0])
XtX[ii_diag]=L2reg+boost*XtXdiag  +XtXdiag 
BB=np.linalg.inv(XtX)
eta = 1.0 / np.diag(BB)
eta = L2reg+boost*XtXdiag + (eta - L2reg-boost*XtXdiag)*diagStrength
BB*=- eta
BB[ii_diag]=0.0 #incorrect diag, but irrelevant
print(datetime.datetime.now())

2024-06-18 23:17:08.155311
2024-06-18 23:17:25.902687


In [60]:
evaluate(BB)

2024-06-18 23:17:25.909285
0 ... 5000
5000 ... 10000
Test NDCG@100=0.42245 (0.00215)
Test Recall@20=0.39232 (0.00268)
Test Recall@50=0.52273 (0.00282)
2024-06-18 23:17:34.710133


[0.4224484206963344, 0.3923214242839204, 0.5227278221083802]

In [61]:
# zero diag, full rank
L2reg=700.0
pdrop=0.125
diagStrength =1.0

for pdrop in [0.2, 0.15, 0.125]:
    print("===================================================================")
    print(datetime.datetime.now())
    boost= pdrop/(1.0-pdrop)
    ii_diag=np.diag_indices(XtX.shape[0])
    XtX[ii_diag]=L2reg+boost*XtXdiag  +XtXdiag 
    BB=np.linalg.inv(XtX)
    eta = 1.0 / np.diag(BB)
    eta = L2reg+boost*XtXdiag + (eta - L2reg-boost*XtXdiag)*diagStrength
    BB*=- eta
    BB[ii_diag]=0.0 #incorrect diag, but irrelevant
    print(datetime.datetime.now())
    evaluate(BB)
    print(datetime.datetime.now())

2024-06-18 23:17:34.720774
2024-06-18 23:17:52.311051
2024-06-18 23:17:52.311357
0 ... 5000
5000 ... 10000
Test NDCG@100=0.42304 (0.00214)
Test Recall@20=0.39101 (0.00268)
Test Recall@50=0.52363 (0.00282)
2024-06-18 23:18:01.010673
2024-06-18 23:18:01.013015
2024-06-18 23:18:01.013033
2024-06-18 23:18:18.856044
2024-06-18 23:18:18.856341
0 ... 5000
5000 ... 10000
Test NDCG@100=0.42267 (0.00214)
Test Recall@20=0.39086 (0.00268)
Test Recall@50=0.52322 (0.00282)
2024-06-18 23:18:27.599002
2024-06-18 23:18:27.601343
2024-06-18 23:18:27.601380
2024-06-18 23:18:45.139126
2024-06-18 23:18:45.139424
0 ... 5000
5000 ... 10000
Test NDCG@100=0.42228 (0.00214)
Test Recall@20=0.39082 (0.00268)
Test Recall@50=0.52293 (0.00282)
2024-06-18 23:18:53.913245
2024-06-18 23:18:53.915464


In [63]:
nflx

optimal L2 

-------------------------------------- boost 0.3
-- L2=1000  and  boost= 0.3
Test NDCG@100=0.39744 (0.00099)
Test Recall@20=0.36504 (0.00128)
Test Recall@50=0.44913 (0.00124)

-- L2=700  and  boost= 0.3
Test NDCG@100=0.39754 (0.00099)
Test Recall@20=0.36533 (0.00128)
Test Recall@50=0.44927 (0.00124)

-- L2=300  and  boost= 0.3
Test NDCG@100=0.39737 (0.00099)
Test Recall@20=0.36524 (0.00128)
Test Recall@50=0.44873 (0.00124)

---------------------------------------boost 0.5
-- L2=700  and  boost= 0.5
Test NDCG@100=0.39767 (0.00099)
Test Recall@20=0.36536 (0.00128)
Test Recall@50=0.44923 (0.00124)

-- L2=300  and  boost= 0.5
Test NDCG@100=0.39753 (0.00099)
Test Recall@20=0.36533 (0.00128)
Test Recall@50=0.44907 (0.00124)

--------------------------------------boost 1.0

-- L2=700  and  boost= 1.0
Test NDCG@100=0.39536 (0.00098)
Test Recall@20=0.36269 (0.00127)
Test Recall@50=0.44700 (0.00124)

-- L2=300  and  boost= 1.0
Test NDCG@100=0.39551 (0.00098)
Test Recall@20=0.36272 (0.00127)
Test Recall@50=0.44697 (0.00124)

=================================== L2=0
-- L2=0  and  boost= 0.3
Test NDCG@100=0.39091 (0.00100)
Test Recall@20=0.35941 (0.00127)
Test Recall@50=0.44085 (0.00125)


-- L2=0  and  boost= 0.5
Test NDCG@100=0.39264 (0.00100)
Test Recall@20=0.36054 (0.00127)
Test Recall@50=0.44276 (0.00125)

-- L2=0  and  boost= 1.0
Test NDCG@100=0.39256 (0.00099)
Test Recall@20=0.35996 (0.00127)
Test Recall@50=0.44324 (0.00124)


-- L2=0  and  boost= 2.0
Test NDCG@100=0.38765 (0.00097)
Test Recall@20=0.35395 (0.00126)
Test Recall@50=0.43822 (0.00124)

-- L2=0  and  boost= 10.0
Test NDCG@100=0.35408 (0.00093)
Test Recall@20=0.32016 (0.00122)
Test Recall@50=0.40299 (0.00123)





SyntaxError: invalid syntax (3717840567.py, line 5)

# train denoising AE

In [64]:
prob_drop=0.3333334
prob_present=1.0-prob_drop

prob_drop / prob_present


0.500000150000015

In [65]:

L2regList = [0.0]
    
for kk in range(len(L2regList)):
    print ("==========================================================")
    prob_drop=0.05
    prob_present=1.0-prob_drop

    L2reg= L2regList[kk]
    L2_drop = prob_drop / prob_present


    #train precompute
    print(datetime.datetime.now())
    ii_diag=np.diag_indices(XtX.shape[0])
    XtX[ii_diag]=  XtXdiag *(1.0+L2_drop) + L2reg 
    CC=np.linalg.inv(XtX)
    print(datetime.datetime.now())

    # train iterate
    beta=0.0
    for _ in range(1):
        eta = (1.0 - prob_present *beta ) / np.diag(CC)
        BB = CC * (-eta)
        BB[ii_diag]= 0.0 # prob_present *beta
        beta=   1.0-  np.diag(  XtX.dot(BB)  )   / XtXdiag
        print("{} .  {}".format(np.mean(beta), np.std(beta)))

    #evaluate
    print("---- prob_drop={}    L2reg={}     L2_drop={}".format(prob_drop, L2reg, L2_drop))
    evaluate(BB)

2024-06-18 23:20:33.666514
2024-06-18 23:20:51.261806
0.27624794840812683 .  0.21409182250499725
---- prob_drop=0.05    L2reg=0.0     L2_drop=0.052631578947368425
2024-06-18 23:20:55.124279
0 ... 5000
5000 ... 10000
Test NDCG@100=0.39417 (0.00219)
Test Recall@20=0.36874 (0.00270)
Test Recall@50=0.48960 (0.00294)
2024-06-18 23:21:04.351404


In [66]:
====== pop drop =0.3333333333

SyntaxError: invalid syntax (2487073303.py, line 1)

In [None]:
beta_closedform= 1.0/prob_present  * ( 1.0- prob_drop/prob_present * XtXdiag * np.diag(CC)   )

In [None]:
plt.plot(beta, beta_closedform,'x')

In [None]:
vec=XtXdiag * np.diag(CC)


In [None]:
plt.hist(vec, bins=200, alpha=0.8,  density=True,histtype='step', color='black', linewidth=2.0,  label='calibrated')
plt.show()

In [None]:
plt.plot(np.log(XtXdiag) , np.log(vec),'x')

In [None]:
============ drop pop =0.75

In [None]:
beta_closedform= 1.0/prob_present  * ( 1.0- prob_drop/prob_present * XtXdiag * np.diag(CC)   )

plt.plot(beta, beta_closedform,'x')
plt.show()

vec=XtXdiag * np.diag(CC)
plt.hist(vec, bins=200, alpha=0.8,  density=True,histtype='step', color='black', linewidth=2.0,  label='calibrated')
plt.show()



In [None]:
beta_closedform_075=deepcopy(beta_closedform)

In [None]:
beta_closedform_033=deepcopy(beta_closedform)

In [None]:
plt.plot(beta_closedform_033,beta_closedform_075,'x')

In [None]:
beta_closedform_01= 1.0/prob_present  * ( 1.0- prob_drop/prob_present * XtXdiag * np.diag(CC)   )

In [None]:
plt.plot(beta_closedform_033,beta_closedform_01,'x')

In [None]:
beta_closedform_005= 1.0/prob_present  * ( 1.0- prob_drop/prob_present * XtXdiag * np.diag(CC)   )

In [None]:
plt.plot(beta_closedform_01,beta_closedform_005,'x')

In [None]:
del beta_closedform_05

In [None]:
---- prob_drop=0.5    L2reg=700.0     L2_drop=1.0
Test NDCG@100=0.39211 (0.00098)
Test Recall@20=0.35892 (0.00127)
Test Recall@50=0.44253 (0.00124)
---- prob_drop=0.5    L2reg=200.0     L2_drop=1.0   ... best L2
Test NDCG@100=0.39241 (0.00098)
Test Recall@20=0.35962 (0.00127)
Test Recall@50=0.44256 (0.00124)
---- prob_drop=0.5    L2reg=100.0     L2_drop=1.0
Test NDCG@100=0.39208 (0.00098)
Test Recall@20=0.35928 (0.00127)
Test Recall@50=0.44204 (0.00124)

In [None]:
evaluate(BB)

In [None]:
nflx
---drop 0.3
0.8734440207481384 .  0.07434113323688507
Test NDCG@100=0.38795 (0.00099)
Test Recall@20=0.35616 (0.00128)
Test Recall@50=0.43691 (0.0012)

-- drop 0.5
0.8447370529174805 .  0.08548472821712494
Test NDCG@100=0.38996 (0.00099)
Test Recall@20=0.35728 (0.00127)
Test Recall@50=0.43953 (0.00124)


-- drop 0.7
Test NDCG@100=0.38360 (0.00097)
Test Recall@20=0.34970 (0.00126)
Test Recall@50=0.43369 (0.00124)

-- drop 0.7 with l2_drop=0.5
Test NDCG@100=0.39155 (0.00099)
Test Recall@20=0.35898 (0.00127)
Test Recall@50=0.44142 (0.00124)

-- drop 0.999 with optimal l2: 700 and ratio=0.5
Test NDCG@100=0.39767 (0.00099)
Test Recall@20=0.36536 (0.00128)
Test Recall@50=0.44923 (0.00124)

-- drop 0.7 with optimal l2: 700 and ratio=0.5
Test NDCG@100=0.39661 (0.00099)
Test Recall@20=0.36417 (0.00128)
Test Recall@50=0.44775 (0.00124)
    
-- drop 0.5 with optimal l2: 700 and ratio=0.5    ... also a regular dropout with addl L2 700
Test NDCG@100=0.39510 (0.00099)
Test Recall@20=0.36292 (0.00128)
Test Recall@50=0.44599 (0.00124)

-- drop 0.25 with optimal l2: 700 and ratio=0.5
Test NDCG@100=0.39011 (0.00098)
Test Recall@20=0.35725 (0.00128)
Test Recall@50=0.44089 (0.00124)


In [None]:
beta=1.0-  np.diag(  XtX.dot(BB)  )   / XtXdiag

In [None]:
print(np.median(beta))
print(np.mean(beta))
print(np.max(beta))
print(np.min(beta))


In [None]:
plt.plot(np.log(XtXdiag), beta,'x')

In [None]:
del BB

# low rank with 0diag and rho-admm 

In [None]:

rhoadd=200

#print("precomputing")
#precompute
#ii_diag=np.diag_indices(XtX.shape[0])
#XtX[ii_diag]=L2reg+boost*XtXdiag  +XtXdiag 
#PP=np.linalg.inv(XtX)

for prob_dropout in [  0.1]:
    boost= prob_dropout/(1.0-prob_dropout)
    print("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb prob_dropout= {}".format(prob_dropout))
    for L2reg in [ 50.0 ]:
      for dim in  [   1000 ] : #[ 10 , 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000]:
        print("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa dim={}".format(dim))

        print(datetime.datetime.now())
        EE=np.random.randn(XtX.shape[0], dim) * 0.0001 #* sparsityMask
        FFt=np.zeros((dim,XtX.shape[0]),dtype=float) 
        print(datetime.datetime.now())

        print("precomputing")
        #precompute
        rhoVec = L2reg+boost*XtXdiag + rhoadd
        ii_diag=np.diag_indices(XtX.shape[0])
        XtX[ii_diag]=L2reg+boost*XtXdiag  +XtXdiag +rhoVec
        PP=np.linalg.inv(XtX)


        for itercnt in range(40):
            print("================= iterCnt: {}".format(itercnt))
            #print("  --- train FF")
            #print(datetime.datetime.now())
            ii_diag=np.diag_indices(XtX.shape[0])
            XtX[ii_diag]= L2reg+boost*XtXdiag  +XtXdiag 
            HH=EE.T.dot(XtX).dot(EE)
            #print("          10")
            HH= np.linalg.inv(HH)  .dot(EE.T)
            #print("          11")
            XtX[ii_diag]= XtXdiag
            GG= HH.dot(XtX + (rhoVec[:,None]*EE).dot(FFt)  )
            #print("          12")
            diag_up = np.diag(EE.dot(GG))
            diag_down = np.diag(EE.dot(HH))
            eta= diag_up / diag_down 
            #print("          13")
            FFt = GG - HH * eta 
            del GG
            del HH
            #print("avg. diag value1: {}".format(np.mean(np.abs(np.diag(EE.dot(FFt))))))
            #print("          14")
            ## make sparse
            #FFt *= sparsityMask.T
            #print(datetime.datetime.now())
            #print("  --- train EE")
            #print(datetime.datetime.now())
            #XtX[ii_diag]=L2reg+boost*XtXdiag  +XtXdiag 
            #HH=np.linalg.inv(XtX)
            #print("          1")
            GG= FFt.dot(FFt.T)
            #print("          2")
            GG=np.linalg.inv(GG)
            GG=FFt.T.dot(GG)
            #print("          3")
            #if diagStrength>0.0:
            #print("          4")
            #KK = FFt.T.dot(GG).dot(FFt)
            #print("          5")
            HH= PP.dot(  XtX+(rhoVec[:,None]*EE).dot(FFt)  ).dot(GG)
            eta= np.linalg.solve( PP * (GG.dot(FFt))  , np.diag( HH.dot(FFt)    ))
            #eta= np.linalg.pinv(HH * KK ).dot(np.diag(KK))
            #print("          6")
            #eta= L2reg  +boost*XtXdiag+ (eta-L2reg-boost*XtXdiag)*diagStrength
            #print("          7")
            #else:
            #if diagStrength==0.0:
            #        eta= L2reg +boost*XtXdiag
            EE = HH -  (PP *eta).dot(GG) 
            del GG
            del HH
            #print("avg. diag value2: {}".format(np.mean(np.abs(np.diag(EE.dot(FFt))))))
            print(datetime.datetime.now())
            ###### eval
            if (itercnt+1) in [1,5,10,15,20,30,40]:
                print("========================= eval:")
                BB= EE.dot(FFt)
                evaluate(BB)
                del BB
                #print("%d %d\t%.3f\t%.3f\t%.3f" %(dim, L2reg, np.mean(n100_list), np.mean(r20_list), np.mean(r50_list)))

                print(datetime.datetime.now())

# low-rank

In [None]:
########## orig old

diagStrength =0.0
boost=0.00

#print("precomputing")
#precompute
#ii_diag=np.diag_indices(XtX.shape[0])
#XtX[ii_diag]=L2reg+boost*XtXdiag  +XtXdiag 
#PP=np.linalg.inv(XtX)

for prob_dropout in [  0.66666667]:
    boost= prob_dropout/(1.0-prob_dropout)
    print("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb prob_dropout= {}".format(prob_dropout))
    for L2reg in [ 200.0 ]:
      for dim in  [   1000 ] : #[ 10 , 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000]:
        print("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa dim={}".format(dim))

        print(datetime.datetime.now())
        EE=np.random.randn(XtX.shape[0], dim) * 0.0001 #* sparsityMask
        print(datetime.datetime.now())

        print("precomputing")
        #precompute
        ii_diag=np.diag_indices(XtX.shape[0])
        XtX[ii_diag]=L2reg+boost*XtXdiag  +XtXdiag 
        PP=np.linalg.inv(XtX)


        for itercnt in range(40):
            print("================= iterCnt: {}".format(itercnt))
            #print("  --- train FF")
            #print(datetime.datetime.now())
            ii_diag=np.diag_indices(XtX.shape[0])
            XtX[ii_diag]= L2reg+boost*XtXdiag  +XtXdiag 
            HH=EE.T.dot(XtX).dot(EE)
            #print("          10")
            HH= np.linalg.inv(HH)  .dot(EE.T)
            #print("          11")
            XtX[ii_diag]= XtXdiag
            GG= HH.dot(XtX )
            #print("          12")
            diag_up = np.diag(EE.dot(GG))
            diag_down = np.diag(EE.dot(HH))
            eta= diag_up / diag_down 
            #print("          13")
            FFt = GG - HH * eta * diagStrength
            del GG
            #print("          14")
            ## make sparse
            #FFt *= sparsityMask.T
            #print(datetime.datetime.now())
            #print("  --- train EE")
            #print(datetime.datetime.now())
            #XtX[ii_diag]=L2reg+boost*XtXdiag  +XtXdiag 
            #HH=np.linalg.inv(XtX)
            #print("          1")
            GG= FFt.dot(FFt.T)
            #print("          2")
            GG=np.linalg.inv(GG)
            #print("          3")
            #if diagStrength>0.0:
            #print("          4")
            #KK = FFt.T.dot(GG).dot(FFt)
            #print("          5")
            
            eta= L2reg +boost*XtXdiag #if diag unconstraint
            if itercnt>3:  
             if diagStrength>0.0:
                print("------------------------------- zero diagonsl enforced !")
                KK = FFt.T.dot(GG).dot(FFt)
                eta= np.linalg.solve( PP * KK   , np.diag(KK))
            #eta= np.linalg.pinv(HH * KK ).dot(np.diag(KK))
            #print("          6")
            #eta= L2reg  +boost*XtXdiag+ (eta-L2reg-boost*XtXdiag)*diagStrength
            #print("          7")
            #else:
            #if diagStrength==0.0:
            #        eta= L2reg +boost*XtXdiag
            HH=PP *(-eta) 
            HH[ii_diag]+=1
            #print("          8")
            EE=HH.dot( FFt.T.dot(GG) )
            #print("          9")

            ## make sparse
            #EE*=sparsityMask
            print(datetime.datetime.now())
            ###### eval
            if (itercnt+1) in [1,5,10,15,20,30,40]:
                print("========================= eval:")
                BB= EE.dot(FFt)
                evaluate(BB)
                del BB
                #print("%d %d\t%.3f\t%.3f\t%.3f" %(dim, L2reg, np.mean(n100_list), np.mean(r20_list), np.mean(r50_list)))

                print(datetime.datetime.now())

In [None]:
EEnn= (1.0 /np.sqrt(np.sum(EE*EE,1)))[:,None]  *EE

In [None]:
FFtnn= FFt / np.sqrt(np.sum(FFt*FFt,0))

In [None]:
EEnn= (1.0 /np.sqrt(np.sum(EE*EE,1)))[:,None]  *EE
FFtnn= FFt / np.sqrt(np.sum(FFt*FFt,0))
cosineUnconstr=np.diag(EEnn.dot(FFtnn))
del EEnn
del FFtnn
plt.hist(cosineUnconstr, bins=100, alpha=0.8,  density=True,histtype='step', color='black', linewidth=2.0,  label='calibrated')
plt.show()

In [None]:
plt.hist(cosineUnconstr, bins=100, alpha=0.8,  density=True,histtype='step', color='black', linewidth=2.0,  label='calibrated')
plt.show()

In [None]:
save_pkl(cosineUnconstr, "/root/projects/mySLIM/paper/msd_cosineUnconstr.pkl" )

In [None]:

######### NEW ######### !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

boost=0.00

#print("precomputing")
#precompute
#ii_diag=np.diag_indices(XtX.shape[0])
#XtX[ii_diag]=L2reg+boost*XtXdiag  +XtXdiag 
#PP=np.linalg.inv(XtX)

aaaa=1.0
bbbb=1.0

for prob_dropout in [0.25]: #, 0.3333, 0.5, 0.66667, 0.75]:
    boost= prob_dropout/(1.0-prob_dropout)
    print("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb prob_dropout= {}".format(prob_dropout))
    for L2reg in [ 0.0  ]:
      for dim in  [   1000 ] : #[ 10 , 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000]:
        print("aaaaaaa dim={}".format(dim))

        print(datetime.datetime.now())
        EE=np.random.randn(XtX.shape[0], dim) * 0.0001 #* sparsityMask
        print(datetime.datetime.now())

        print("precomputing")
        #precompute
        ii_diag=np.diag_indices(XtX.shape[0])
        XtX[ii_diag]=L2reg+boost*XtXdiag  +XtXdiag 
        PP=np.linalg.inv(XtX)

        betavec=0.0 #init
        
        for itercnt in range(40):
            print("================= iterCnt: {}".format(itercnt))
            #print("  --- train FF")
            #print(datetime.datetime.now())
            XtildeDiag = 1.0+betavec*(1.0-bbbb/(aaaa*prob_dropout+bbbb*(1.0-prob_dropout)))

            ii_diag=np.diag_indices(XtX.shape[0])
            XtX[ii_diag]= L2reg+boost*XtXdiag  +XtXdiag 
            HH=EE.T.dot(XtX).dot(EE)
            #print("          10")
            HH= np.linalg.inv(HH)  .dot(EE.T)
            #print("          11")
            XtX[ii_diag]= XtXdiag
            XtildeDiag = ...
            GG= HH.dot(XtX * XtildeDiag)
            #print("          12")
            diag_up = np.diag(EE.dot(GG))-betavec 
            diag_down = np.diag(EE.dot(HH))
            eta= diag_up / diag_down 
            #print("          13")
            FFt = GG - HH * eta 
            #print("          14")
            ## make sparse
            #FFt *= sparsityMask.T
            #print(datetime.datetime.now())
            #print("  --- train EE")
            #print(datetime.datetime.now())
            #XtX[ii_diag]=L2reg+boost*XtXdiag  +XtXdiag 
            #HH=np.linalg.inv(XtX)
            #print("          1")
            GG= FFt.dot(FFt.T)
            #print("          2")
            GG=np.linalg.inv(GG)
            #print("          3")
            #if diagStrength>0.0:
            #print("          4")
            #KK = FFt.T.dot(GG).dot(FFt)
            #print("          5")
            ######KK = FFt.T.dot(GG).dot(FFt)
            #####eta= np.linalg.solve( PP * KK   , np.diag(KK))  ........... or reuse old value
            #eta= np.linalg.pinv(HH * KK ).dot(np.diag(KK))
            #print("          6")
            #eta= L2reg  +boost*XtXdiag+ (eta-L2reg-boost*XtXdiag)*diagStrength
            #print("          7")
            #else:
            HH = XtX * XtildeDiag
            ii_diag=np.diag_indices(HH.shape[0])
            HH[ii_diag] -= eta 
            HH=PP.dot(HH)
            #print("          8")
            EE=HH.dot( FFt.T.dot(GG) )
            #print("          9")

            
            #### 
            XtX[ii_diag]=XtXdiag
            BB= EE.dot(FFt)
            BB[ii_diag]=0.0
            betavec= (1.0-prob_dropout)*(1.0- (L2reg+np.diag(XtX.dot(BB)))  / (L2reg+XtXdiag))
            ## make sparse
            #EE*=sparsityMask
            print(datetime.datetime.now())
            ###### eval
            if (itercnt+1) in [1,5,10,15,20,30,40]:
                print("========================= eval:")
                BB= EE.dot(FFt)
                evaluate(BB)

                #print("%d %d\t%.3f\t%.3f\t%.3f" %(dim, L2reg, np.mean(n100_list), np.mean(r20_list), np.mean(r50_list)))

                print(datetime.datetime.now())

In [None]:
--- boost=1, l2=0, full diag
Test NDCG@100=0.37212 (0.00098)
Test Recall@20=0.34091 (0.00126)
Test Recall@50=0.42247 (0.00124)

--- boost=0.33333, l2=0, zero diag
Test NDCG@100=0.38136 (0.00099)
Test Recall@20=0.35009 (0.00127)
Test Recall@50=0.42930 (0.00125)


# tensorflow DAE

In [None]:
import tensorflow as tf
from tensorflow.contrib.layers import apply_regularization, l2_regularizer

In [None]:
def train_TF_DAE(weightsList, X,  hidden_dim, L2reg_total, dropout_prob, lam_W, train_epochs, bsize = 256):
   
    
    n_dim=X.shape[1]
    y_dim=n_dim

    #defhidden_dim, 
    #dropout_prob, 
    #lam_W = 0.01  # L2 per layer
    #bsize = 256  # batch size for training
    # no bias terms in model

    tf.reset_default_graph()  # reset graph if run many times
    ############## input X and target y
    # None is saved as batch_size
    input_ph = tf.placeholder(tf.float32, shape=(None, n_dim), name='input')
    target_ph = tf.placeholder(tf.float32, shape=(None, y_dim), name='target')
    ############ regularization lambda
    lam = tf.placeholder(tf.float32, shape=(), name='lambda')
    lam_total = tf.placeholder(tf.float32, shape=(), name='lambda_total')
    
    ########### network structure 
    model_dims = [n_dim, hidden_dim, y_dim]
    # weight for the neural nets, no bias terms
    Ws = []
    for l, (din, dout) in enumerate(zip(model_dims[:-1], model_dims[1:])):
        print("Layer %d: input dimension=%d, output dimension=%d" % (l, din, dout))
        if len(weightsList)==0:
            Ws.append(tf.get_variable(name="weight%d" % l, shape=[din, dout], 
                                  initializer=tf.contrib.layers.xavier_initializer()))
        else:
            Ws.append(tf.Variable(initial_value=weightsList[l], name="weight%d" % l))          
    # define network architecture
    h1 = input_ph
    h = tf.nn.dropout(h1, rate=dropout_prob)
    for w in Ws[:-1]:
        h = tf.matmul(h, w)   # linear model
    y_hat = tf.matmul(h, Ws[-1])
    ########### define regularization and loss
    reg = l2_regularizer(lam)        
    reg_var = apply_regularization(reg, Ws)

    reg_total = l2_regularizer(lam_total)        
    #dummy = tf.get_variable(name="dummy", shape=[din, dout], 
    #                              initializer=tf.contrib.layers.xavier_initializer())
    reg_var_total = apply_regularization(reg_total, [ tf.matmul(Ws[0],Ws[1]) ] )

    mse = tf.reduce_mean( tf.square(tf.subtract(target_ph, y_hat)))

    loss = mse +    reg_var_total + reg_var 
    ############# define optimizer
    optimizer = tf.train.AdamOptimizer()
    train_op = optimizer.minimize(loss)
    ########### start training
    N = X.shape[0]
    idxlist = np.arange(N)
    with tf.Session() as sess:

        init = tf.global_variables_initializer()
        sess.run(init)

        loss_list = list()
        mse_list = list()

        for epoch in range(train_epochs):
            loss_epoch = list()
            mse_epoch = list()

            np.random.shuffle(idxlist)
            for bnum, st_idx in enumerate(range(0, N, bsize)):
                end_idx = min(st_idx + bsize, N)

                inp = X[idxlist[st_idx:end_idx]]
                if sparse.isspmatrix(inp):
                    inp = inp.toarray()
                    inp = inp.astype('float32')           
            
                
                
                tar = inp

                feed_dict = {input_ph:inp, target_ph:tar, lam:lam_W, lam_total:L2reg_total}

                l, m, _ = sess.run([loss, mse, train_op], feed_dict=feed_dict)
                loss_epoch.append(l)
                mse_epoch.append(m)

            loss_list.append(np.mean(loss_epoch))
            mse_list.append(np.mean(mse_epoch))


            print("Epoch: %d, training_rmse=%.3f, training_loss=%.3f" % (epoch, np.sqrt(mse_list[-1]), loss_list[-1]))
            print(datetime.datetime.now())
            
 
            if epoch+1 in [1,5,10,20,30,40,50]:
                print("--epoch {}".format(epoch))
                UU= sess.run(Ws[0])
                VV = sess.run(Ws[1] ) 
                BBtf= UU.dot(VV)
                evaluate(BBtf)

            
        UU= sess.run(Ws[0])
        VV = sess.run(Ws[1] ) 
    return [UU,VV, loss_list]



In [None]:
hidden_dim=500
dropout_prob=0.0
L2reg_total = 100000.0
lam_W = 0.0  # L2 per layer
bsize = 4096  # batch size for training
train_epochs = 5

L2reg_total_perbatch = bsize/1.0/userCnt * L2reg_total
print(datetime.datetime.now())
UUtf, VVtf, loss_list = train_TF_DAE([],X,  hidden_dim, L2reg_total_perbatch, dropout_prob, lam_W, train_epochs, bsize )
print(datetime.datetime.now())

In [None]:
L2reg_total_perbatch = bsize/1.0/userCnt * L2reg_total

In [None]:
print(UUtf.shape)
print(VVtf.shape)
BBtf= UUtf.dot(VVtf)
print(BBtf.shape)




In [None]:
evaluate(BBtf)

In [None]:
Test NDCG@100=0.37316 (0.00099)
Test Recall@20=0.34315 (0.00126)
Test Recall@50=0.42084 (0.00124)

In [None]:
hidden_dim=500
dropout_prob=0.0
L2reg_total = 100000.0
lam_W = 0.0  # L2 per layer
bsize = 4096  # batch size for training
train_epochs = 5

L2reg_total_perbatch = bsize/1.0/userCnt * L2reg_total
print(datetime.datetime.now())
UUtf, VVtf, loss_list = train_TF_DAE([],X,  hidden_dim, L2reg_total_perbatch, dropout_prob, lam_W, train_epochs, bsize )
print(datetime.datetime.now())

BBtf= UUtf.dot(VVtf)
evaluate(BBtf)

In [None]:
hidden_dim=500
dropout_prob=0.0
L2reg_total = 100000.0
lam_W = 0.0  # L2 per layer
bsize = 4096  # batch size for training
train_epochs = 5

L2reg_total_perbatch = 1.0/userCnt * L2reg_total
print(datetime.datetime.now())
UUtf, VVtf, loss_list = train_TF_DAE([],X,  hidden_dim, L2reg_total_perbatch, dropout_prob, lam_W, train_epochs, bsize )
print(datetime.datetime.now())

BBtf= UUtf.dot(VVtf)
evaluate(BBtf)

In [None]:
hidden_dim=500
dropout_prob=0.0
L2reg_total = 100000.0
lam_W = 0.0  # L2 per layer
bsize = 4096  # batch size for training
train_epochs = 5

L2reg_total_perbatch = 0.001*1.0/userCnt * L2reg_total
print(datetime.datetime.now())
UUtf, VVtf, loss_list = train_TF_DAE([],X,  hidden_dim, L2reg_total_perbatch, dropout_prob, lam_W, train_epochs, bsize )
print(datetime.datetime.now())

BBtf= UUtf.dot(VVtf)
evaluate(BBtf)

In [None]:
dden_dim=500
dropout_prob=0.0
L2reg_total = 100000.0
lam_W = 0.0  # L2 per layer
bsize = 4096  # batch size for training
train_epochs = 5

L2reg_total_perbatch = 0.0001*1.0/userCnt * L2reg_total
print(datetime.datetime.now())
UUtf, VVtf, loss_list = train_TF_DAE([],X,  hidden_dim, L2reg_total_perbatch, dropout_prob, lam_W, train_epochs, bsize )
print(datetime.datetime.now())

BBtf= UUtf.dot(VVtf)
evaluate(BBtf)


for _ in range(5):
    print(datetime.datetime.now())
    UUtf, VVtf, loss_list = train_TF_DAE([UUtf, VVtf],X, hidden_dim, L2reg_total_perbatch, dropout_prob, lam_W, train_epochs, bsize )
    print(datetime.datetime.now())

    BBtf= UUtf.dot(VVtf)
    evaluate(BBtf)

In [None]:
hidden_dim=500
dropout_prob=0.0
L2reg_total = 100000.0
lam_W = 0.0  # L2 per layer
bsize = 4096  # batch size for training
train_epochs = 5

L2reg_total_perbatch = 0.0003*1.0/userCnt * L2reg_total
print(datetime.datetime.now())
UUtf, VVtf, loss_list = train_TF_DAE([],X,  hidden_dim, L2reg_total_perbatch, dropout_prob, lam_W, train_epochs, bsize )
print(datetime.datetime.now())

BBtf= UUtf.dot(VVtf)
evaluate(BBtf)


for _ in range(5):
    print(datetime.datetime.now())
    UUtf, VVtf, loss_list = train_TF_DAE([UUtf, VVtf],X, hidden_dim, L2reg_total_perbatch, dropout_prob, lam_W, train_epochs, bsize )
    print(datetime.datetime.now())

    BBtf= UUtf.dot(VVtf)
    evaluate(BBtf)

In [None]:
0.0001*1.0/userCnt * L2reg_total

In [None]:
dden_dim=500
dropout_prob=0.0
L2reg_total = 100000.0
lam_W = 0.0  # L2 per layer
bsize = 4096  # batch size for training
train_epochs = 2

L2reg_total_perbatch = 1.0e-5
print(datetime.datetime.now())
UUtf, VVtf, loss_list = train_TF_DAE([],X,  hidden_dim, L2reg_total_perbatch, dropout_prob, lam_W, train_epochs, bsize )
print(datetime.datetime.now())

BBtf= UUtf.dot(VVtf)
evaluate(BBtf)


for _ in range(20):
    print(datetime.datetime.now())
    UUtf, VVtf, loss_list = train_TF_DAE([UUtf, VVtf],X, hidden_dim, L2reg_total_perbatch, dropout_prob, lam_W, train_epochs, bsize )
    print(datetime.datetime.now())

    BBtf= UUtf.dot(VVtf)
    evaluate(BBtf)

In [None]:
hidden_dim=500
dropout_prob=0.0
L2reg_total = 100000.0
lam_W = 0.0  # L2 per layer
bsize = 4096  # batch size for training
train_epochs = 1

L2reg_total_perbatch = 1.0e-4
print(datetime.datetime.now())
UUtf, VVtf, loss_list = train_TF_DAE([],X,  hidden_dim, L2reg_total_perbatch, dropout_prob, lam_W, train_epochs, bsize )
print(datetime.datetime.now())

BBtf= UUtf.dot(VVtf)
evaluate(BBtf)


for _ in range(20):
    print(datetime.datetime.now())
    UUtf, VVtf, loss_list = train_TF_DAE([UUtf, VVtf],X, hidden_dim, L2reg_total_perbatch, dropout_prob, lam_W, train_epochs, bsize )
    print(datetime.datetime.now())

    BBtf= UUtf.dot(VVtf)
    evaluate(BBtf)

In [None]:
hidden_dim=500
dropout_prob=0.0
L2reg_total = 100000.0
lam_W = 0.0  # L2 per layer
bsize = 4096  # batch size for training
train_epochs = 1

L2reg_total_perbatch = 3.0e-5
print(datetime.datetime.now())
UUtf, VVtf, loss_list = train_TF_DAE([],X,  hidden_dim, L2reg_total_perbatch, dropout_prob, lam_W, train_epochs, bsize )
print(datetime.datetime.now())

BBtf= UUtf.dot(VVtf)
evaluate(BBtf)


for _ in range(10):
    print(datetime.datetime.now())
    UUtf, VVtf, loss_list = train_TF_DAE([UUtf, VVtf],X, hidden_dim, L2reg_total_perbatch, dropout_prob, lam_W, train_epochs, bsize )
    print(datetime.datetime.now())

    BBtf= UUtf.dot(VVtf)
    evaluate(BBtf)

In [None]:
==== 1000 dim

In [None]:
hidden_dim=1000
dropout_prob=0.5
L2reg_total = 100000.0
lam_W = 0.0  # L2 per layer
bsize = 32000  # batch size for training
train_epochs = 50
L2reg_total_perbatch=0.0


#for  L2reg_total_perbatch in [ 1.0e-6, 2.0e-6, 5.0e-6, 1.0e-5, 2.0e-5, 5.0e-5, 1.0e-4   ]:
for  dropout_prob in [ 0.25, 0.3333, 0.5, 0.66667, 0.75, 0.9, 0.95, 0.99   ]:

    print("============================================================ dropout_prob = {}".format(dropout_prob  ))    
    print(datetime.datetime.now())
    UUtf, VVtf, loss_list = train_TF_DAE([],X,  hidden_dim, L2reg_total_perbatch, dropout_prob, lam_W, train_epochs, bsize )
    print(datetime.datetime.now())


In [None]:
with tf.Session() as sess:
    x = tf.constant(np.array([1, 2, 3])[:,None] )
    #y = tf.broadcast_to(x, [3, 2]) 
    aa = tf.constant(np.array([[10, 30],[40, 60],[70,90]]) )
    bb = tf.multiply(x, aa) 
    #print(sess.run(y)) 
    print(sess.run(aa))
    print(sess.run(bb))
    

In [None]:
dden_dim=500
dropout_prob=0.0
L2reg_total = 100000.0
lam_W = 0.0  # L2 per layer
bsize = 4096  # batch size for training
train_epochs = 5

L2reg_total_perbatch = 0.00001*1.0/userCnt * L2reg_total
print(datetime.datetime.now())
UUtf, VVtf, loss_list = train_TF_DAE([],X,  hidden_dim, L2reg_total_perbatch, dropout_prob, lam_W, train_epochs, bsize )
print(datetime.datetime.now())

BBtf= UUtf.dot(VVtf)
evaluate(BBtf)

In [None]:
dden_dim=500
dropout_prob=0.0
L2reg_total = 100000.0
lam_W = 0.0  # L2 per layer
bsize = 4096  # batch size for training
train_epochs = 5

L2reg_total_perbatch = 0.003*1.0/userCnt * L2reg_total
print(datetime.datetime.now())
UUtf, VVtf, loss_list = train_TF_DAE([],X,  hidden_dim, L2reg_total_perbatch, dropout_prob, lam_W, train_epochs, bsize )
print(datetime.datetime.now())

BBtf= UUtf.dot(VVtf)
evaluate(BBtf)

In [None]:
22222222222222222222222222222222222222222222222222222

In [None]:
def train_TF_analytic(weightsList, X,  hidden_dim, L2reg_xtx, L2reg_total, dropout_prob, lam_W, train_epochs, bsize = 256):
   
    
    n_dim=X.shape[1]
    y_dim=n_dim

    #defhidden_dim, 
    #dropout_prob, 
    #lam_W = 0.01  # L2 per layer
    #bsize = 256  # batch size for training
    # no bias terms in model

    tf.reset_default_graph()  # reset graph if run many times
    ############## input X and target y
    # None is saved as batch_size
    input_ph = tf.placeholder(tf.float32, shape=(None, n_dim), name='input')
    target_ph = tf.placeholder(tf.float32, shape=(None, y_dim), name='target')
    ############ regularization lambda
    lam = tf.placeholder(tf.float32, shape=(), name='lambda')
    lam_total = tf.placeholder(tf.float32, shape=(), name='lambda_total')
    lam_xtx = tf.placeholder(tf.float32, shape=(), name='lambda_xtx')
    
    ########### network structure 
    model_dims = [n_dim, hidden_dim, y_dim]
    # weight for the neural nets, no bias terms
    Ws = []
    for l, (din, dout) in enumerate(zip(model_dims[:-1], model_dims[1:])):
        print("Layer %d: input dimension=%d, output dimension=%d" % (l, din, dout))
        if len(weightsList)==0:
            Ws.append(tf.get_variable(name="weight%d" % l, shape=[din, dout], 
                                  initializer=tf.contrib.layers.xavier_initializer()))
        else:
            Ws.append(tf.Variable(initial_value=weightsList[l], name="weight%d" % l))          
    # define network architecture
    h1 = input_ph
    h = tf.nn.dropout(h1, rate=dropout_prob)
    for w in Ws[:-1]:
        h = tf.matmul(h, w)   # linear model
    y_hat = tf.matmul(h, Ws[-1])
    ########### define regularization and loss
    reg = l2_regularizer(lam)        
    reg_var = apply_regularization(reg, Ws)

    reg_total = l2_regularizer(lam_total)        
    #dummy = tf.get_variable(name="dummy", shape=[din, dout], 
    #                              initializer=tf.contrib.layers.xavier_initializer())
    reg_var_total = apply_regularization(reg_total, [ tf.matmul(Ws[0],Ws[1]) ] )

    reg_xtx = l2_regularizer(lam_xtx)        
    reg_var_xtx = apply_regularization(reg_xtx, [ tf.matmul( tf.multiply(tf.constant(np.sqrt(XtXdiag)[:,None]) , Ws[0] ),  Ws[1]) ] )

    mse = tf.reduce_mean( tf.square(tf.subtract(target_ph, y_hat)))

    loss = mse +    reg_var_total + reg_var +reg_var_xtx
    ############# define optimizer
    optimizer = tf.train.AdamOptimizer()
    train_op = optimizer.minimize(loss)
    ########### start training
    N = X.shape[0]
    idxlist = np.arange(N)
    with tf.Session() as sess:

        init = tf.global_variables_initializer()
        sess.run(init)

        loss_list = list()
        mse_list = list()

        for epoch in range(train_epochs):
            loss_epoch = list()
            mse_epoch = list()

            np.random.shuffle(idxlist)
            for bnum, st_idx in enumerate(range(0, N, bsize)):
                end_idx = min(st_idx + bsize, N)

                inp = X[idxlist[st_idx:end_idx]]
                if sparse.isspmatrix(inp):
                    inp = inp.toarray()
                    inp = inp.astype('float32')           
            
                
                
                tar = inp

                feed_dict = {input_ph:inp, target_ph:tar, lam:lam_W, lam_total:L2reg_total, lam_xtx:L2reg_xtx}

                l, m, _ = sess.run([loss, mse, train_op], feed_dict=feed_dict)
                loss_epoch.append(l)
                mse_epoch.append(m)

            loss_list.append(np.mean(loss_epoch))
            mse_list.append(np.mean(mse_epoch))


            print("Epoch: %d, training_rmse=%.3f, training_loss=%.3f" % (epoch, np.sqrt(mse_list[-1]), loss_list[-1]))
            print(datetime.datetime.now())
            
 
            if epoch+1 in [1,5,10,20,30,40,50]:
                print("--epoch {}".format(epoch))
                UU= sess.run(Ws[0])
                VV = sess.run(Ws[1] ) 
                BBtf= UU.dot(VV)
                evaluate(BBtf)

            
        UU= sess.run(Ws[0])
        VV = sess.run(Ws[1] ) 
    return [UU,VV, loss_list]



In [None]:
def train_TF_analytic2(weightsList, X,  hidden_dim, L2reg_xtx, L2reg_total, dropout_prob, lam_W, train_epochs, bsize = 256):
   
    
    n_dim=X.shape[1]
    y_dim=n_dim

    #defhidden_dim, 
    #dropout_prob, 
    #lam_W = 0.01  # L2 per layer
    #bsize = 256  # batch size for training
    # no bias terms in model

    tf.reset_default_graph()  # reset graph if run many times
    ############## input X and target y
    # None is saved as batch_size
    input_ph = tf.placeholder(tf.float32, shape=(None, n_dim), name='input')
    target_ph = tf.placeholder(tf.float32, shape=(None, y_dim), name='target')
    ########### network structure 
    model_dims = [n_dim, hidden_dim, y_dim]
    # weight for the neural nets, no bias terms
    Ws = []
    for l, (din, dout) in enumerate(zip(model_dims[:-1], model_dims[1:])):
        print("Layer %d: input dimension=%d, output dimension=%d" % (l, din, dout))
        if len(weightsList)==0:
            Ws.append(tf.get_variable(name="weight%d" % l, shape=[din, dout], 
                                  initializer=tf.contrib.layers.xavier_initializer()))
        else:
            Ws.append(tf.Variable(initial_value=weightsList[l], name="weight%d" % l))          
    # define network architecture
    h1 = input_ph
    h = tf.nn.dropout(h1, rate=dropout_prob)
    for w in Ws[:-1]:
        h = tf.matmul(h, w)   # linear model
    y_hat = tf.matmul(h, Ws[-1])
    ########### define regularization and loss
    l2_layers = tf.multiply ( tf.constant(lam_W)   , tf.reduce_sum( tf.square(Ws[0]))+tf.reduce_sum(tf.square(Ws[1])) )
    
    l2_tot_1= tf.matmul( tf.multiply(tf.constant(np.sqrt(L2reg_total+ L2reg_xtx* XtXdiag)[:,None]) , Ws[0] ),  Ws[1])
    l2_tot= tf.reduce_sum(tf.square (l2_tot_1 ))
    
    mse = tf.reduce_mean( tf.square(tf.subtract(target_ph, y_hat)))

    loss = mse +l2_layers + l2_tot
    ############# define optimizer
    optimizer = tf.train.AdamOptimizer()
    train_op = optimizer.minimize(loss)
    ########### start training
    N = X.shape[0]
    idxlist = np.arange(N)
    with tf.Session() as sess:

        init = tf.global_variables_initializer()
        sess.run(init)

        loss_list = list()
        mse_list = list()

        for epoch in range(train_epochs):
            loss_epoch = list()
            mse_epoch = list()

            np.random.shuffle(idxlist)
            for bnum, st_idx in enumerate(range(0, N, bsize)):
                end_idx = min(st_idx + bsize, N)

                inp = X[idxlist[st_idx:end_idx]]
                if sparse.isspmatrix(inp):
                    inp = inp.toarray()
                    inp = inp.astype('float32')           
            
                
                
                tar = inp

                feed_dict = {input_ph:inp, target_ph:tar}

                l, m, _ = sess.run([loss, mse, train_op], feed_dict=feed_dict)
                loss_epoch.append(l)
                mse_epoch.append(m)

            loss_list.append(np.mean(loss_epoch))
            mse_list.append(np.mean(mse_epoch))


            print("Epoch: %d, training_rmse=%.3f, training_loss=%.3f" % (epoch, np.sqrt(mse_list[-1]), loss_list[-1]))
            print(datetime.datetime.now())
            
 
            if epoch+1 in [1,5,10,20,30,40,50]:
                print("--epoch {}".format(epoch))
                UU= sess.run(Ws[0])
                VV = sess.run(Ws[1] ) 
                BBtf= UU.dot(VV)
                evaluate(BBtf)

            
        UU= sess.run(Ws[0])
        VV = sess.run(Ws[1] ) 
    return [UU,VV, loss_list]



In [None]:
hidden_dim=1000
#dropout_prob=0.0
#L2reg_total = 0.0
bsize = 32000  # batch size for training
train_epochs = 50

lam_W = 0.0  # L2 per layer
L2reg_total_perbatch=0.0
L2reg_xtx_perbatch=2e-10 *2.0

#for  L2reg_total_perbatch in [ 1.0e-6, 2.0e-6, 5.0e-6, 1.0e-5, 2.0e-5, 5.0e-5, 1.0e-4   ]:
for  dropout_prob in [ 0.0  ]:

    print("============================================================ dropout_prob = {}".format(dropout_prob  ))    
    print(datetime.datetime.now())
    UUtf, VVtf, loss_list = train_TF_analytic([],X,  hidden_dim, L2reg_xtx_perbatch, L2reg_total_perbatch, dropout_prob, lam_W, train_epochs, bsize )
    print(datetime.datetime.now())


In [None]:
hidden_dim=1000
#dropout_prob=0.0
#L2reg_total = 0.0
bsize = 32000  # batch size for training
train_epochs = 50

lam_W = 0.0  # L2 per layer
L2reg_total_perbatch=0.0
L2reg_xtx_perbatch=2e-10 *2.0

#for  L2reg_total_perbatch in [ 1.0e-6, 2.0e-6, 5.0e-6, 1.0e-5, 2.0e-5, 5.0e-5, 1.0e-4   ]:
for  dropout_prob in [ 0.0  ]:

    print("============================================================ dropout_prob = {}".format(dropout_prob  ))    
    print(datetime.datetime.now())
    UUtf, VVtf, loss_list = train_TF_analytic2([],X,  hidden_dim, L2reg_xtx_perbatch, L2reg_total_perbatch, dropout_prob, lam_W, train_epochs, bsize )
    print(datetime.datetime.now())


# analytic b=0 solution in TF

In [None]:
def train_TF_b0(weightsList, X,  hidden_dim, L2reg_xtx, L2reg_total, dropout_prob, lam_W, train_epochs, bsize = 256):
   
    
    n_dim=X.shape[1]
    y_dim=n_dim

    #defhidden_dim, 
    #dropout_prob, 
    #lam_W = 0.01  # L2 per layer
    #bsize = 256  # batch size for training
    # no bias terms in model

    tf.reset_default_graph()  # reset graph if run many times
    ############## input X and target y
    # None is saved as batch_size
    input_ph = tf.placeholder(tf.float32, shape=(None, n_dim), name='input')
    target_ph = tf.placeholder(tf.float32, shape=(None, y_dim), name='target')
    ########### network structure 
    model_dims = [n_dim, hidden_dim, y_dim]
    # weight for the neural nets, no bias terms
    Ws = []
    for l, (din, dout) in enumerate(zip(model_dims[:-1], model_dims[1:])):
        print("Layer %d: input dimension=%d, output dimension=%d" % (l, din, dout))
        if len(weightsList)==0:
            Ws.append(tf.get_variable(name="weight%d" % l, shape=[din, dout], 
                                  initializer=tf.contrib.layers.xavier_initializer()))
        else:
            Ws.append(tf.Variable(initial_value=weightsList[l], name="weight%d" % l))          
    # define network architecture
    h1 = input_ph
    h = tf.nn.dropout(h1, rate=dropout_prob)
    for w in Ws[:-1]:
        h = tf.matmul(h, w)   # linear model
        
    weightDiag= tf.matrix_diag_part( tf.matmul(Ws[0],Ws[1]))
    y_hat = tf.matmul(h, Ws[-1]) - tf.multiply(input_ph, weightDiag)
    ########### define regularization and loss
    l2_layers = tf.multiply ( tf.constant(lam_W)   , tf.reduce_sum( tf.square(Ws[0]))+tf.reduce_sum(tf.square(Ws[1])) )
    
    l2_tot_1= tf.matmul( tf.multiply(tf.constant(np.sqrt(L2reg_total+ L2reg_xtx* XtXdiag)[:,None]) , Ws[0] ),  Ws[1])
    l2_tot= tf.reduce_sum(tf.square (l2_tot_1 ))
    
    l2tot_diag_1= tf.multiply(tf.constant(np.sqrt(L2reg_total+ L2reg_xtx* XtXdiag)), weightDiag )
    l2tot_diag = tf.reduce_sum(tf.square(l2tot_diag_1))
    
    mse = tf.reduce_mean( tf.square(tf.subtract(target_ph, y_hat)))

    loss = mse +l2_layers + l2_tot - l2tot_diag
    ############# define optimizer
    optimizer = tf.train.AdamOptimizer()
    train_op = optimizer.minimize(loss)
    ########### start training
    N = X.shape[0]
    idxlist = np.arange(N)
    with tf.Session() as sess:

        init = tf.global_variables_initializer()
        sess.run(init)

        loss_list = list()
        mse_list = list()

        for epoch in range(train_epochs):
            loss_epoch = list()
            mse_epoch = list()

            np.random.shuffle(idxlist)
            for bnum, st_idx in enumerate(range(0, N, bsize)):
                end_idx = min(st_idx + bsize, N)

                inp = X[idxlist[st_idx:end_idx]]
                if sparse.isspmatrix(inp):
                    inp = inp.toarray()
                    inp = inp.astype('float32')           
            
                
                
                tar = inp

                feed_dict = {input_ph:inp, target_ph:tar}

                l, m, _ = sess.run([loss, mse, train_op], feed_dict=feed_dict)
                loss_epoch.append(l)
                mse_epoch.append(m)

            loss_list.append(np.mean(loss_epoch))
            mse_list.append(np.mean(mse_epoch))


            print("Epoch: %d, training_rmse=%.3f, training_loss=%.3f" % (epoch, np.sqrt(mse_list[-1]), loss_list[-1]))
            print(datetime.datetime.now())
            
 
            if epoch+1 in [1,5,10,20,30,40,50,60,70,80,90]:
                print("--epoch {}".format(epoch))
                UU= sess.run(Ws[0])
                VV = sess.run(Ws[1] ) 
                BBtf= UU.dot(VV)
                evaluate(BBtf)

            
        UU= sess.run(Ws[0])
        VV = sess.run(Ws[1] ) 
    return [UU,VV, loss_list]




In [None]:
hidden_dim=1000
dropout_prob=0.0
#L2reg_total = 0.0
bsize = 32000  # batch size for training
train_epochs = 70

lam_W = 0.0  # L2 per layer
L2reg_total_perbatch=0.0
L2reg_xtx_perbatch=2e-10 * 0.5

#for  L2reg_total_perbatch in [ 1.0e-6, 2.0e-6, 5.0e-6, 1.0e-5, 2.0e-5, 5.0e-5, 1.0e-4   ]:
#for  dropout_prob in [ 0.0  ]:
for L2reg_total_perbatch in [ 2e-5 / 100000.0*500.0 *100.0,  2e-5 / 100000.0*500.0 /100.0 ]:
    print("============================================================ dropout_prob = {}".format(dropout_prob  ))    
    print(datetime.datetime.now())
    UUtf, VVtf, loss_list = train_TF_b0([],X,  hidden_dim, L2reg_xtx_perbatch, L2reg_total_perbatch, dropout_prob, lam_W, train_epochs, bsize )
    print(datetime.datetime.now())


In [None]:
hidden_dim=1000
dropout_prob=0.0
#L2reg_total = 0.0
bsize = 32000  # batch size for training
train_epochs = 20

lam_W = 0.0  # L2 per layer
L2reg_total_perbatch=0.0
L2reg_xtx_perbatch=2e-10 * 0.5

#for  L2reg_total_perbatch in [ 1.0e-6, 2.0e-6, 5.0e-6, 1.0e-5, 2.0e-5, 5.0e-5, 1.0e-4   ]:
#for  dropout_prob in [ 0.0  ]:
for L2reg_total_perbatch in [ 0.0  ]:
  for L2reg_xtx_perbatch in [ 1e-12 ]:
    print("============================================================ dropout_prob = {}".format(dropout_prob  ))    
    print(datetime.datetime.now())
    UUtf, VVtf, loss_list = train_TF_b0([UUtf, VVtf],X,  hidden_dim, L2reg_xtx_perbatch, L2reg_total_perbatch, dropout_prob, lam_W, train_epochs, bsize )
    print(datetime.datetime.now())


In [None]:
hidden_dim=1000
dropout_prob=0.0
#L2reg_total = 0.0
bsize = 32000  # batch size for training
train_epochs = 70

lam_W = 0.0  # L2 per layer
L2reg_total_perbatch=0.0
L2reg_xtx_perbatch=2e-10 * 0.5

#for  L2reg_total_perbatch in [ 1.0e-6, 2.0e-6, 5.0e-6, 1.0e-5, 2.0e-5, 5.0e-5, 1.0e-4   ]:
#for  dropout_prob in [ 0.0  ]:
for L2reg_total_perbatch in [ 0.0 ]:
  for L2reg_xtx_perbatch in [ 1e-10]:
    print("============================================================ dropout_prob = {}".format(dropout_prob  ))    
    print(datetime.datetime.now())
    UUtf, VVtf, loss_list = train_TF_b0([],X,  hidden_dim, L2reg_xtx_perbatch, L2reg_total_perbatch, dropout_prob, lam_W, train_epochs, bsize )
    print(datetime.datetime.now())


In [None]:
# effective catalog size via entropy ... Theil index

qqqq= (XtXdiag / np.sum(XtXdiag)  )
-qqqq.dot(np.log(qqqq))   # log is natural logsrithim

In [None]:
np.exp(7.967947)

# AN EDLA with beta learning

In [28]:
# EDLAe with beta learning




#print("precomputing")
#precompute
#ii_diag=np.diag_indices(XtX.shape[0])
#XtX[ii_diag]=L2reg+boost*XtXdiag  +XtXdiag 
#PP=np.linalg.inv(XtX)

for rhoadd in [500 ]:
  for prob_dropout in [  0.05]:
    boost= prob_dropout/(1.0-prob_dropout)
    print("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb prob_dropout= {}".format(prob_dropout))
    for L2reg in [  5.0 ]:
      #precompute
      print("precomputing")
      rhoVec = boost*XtXdiag  + L2reg +rhoadd
      ii_diag=np.diag_indices(XtX.shape[0])
      XtX[ii_diag]= XtXdiag +boost*XtXdiag  + L2reg+ rhoVec
      PP=np.linalg.inv(XtX)
      for dim in  [ 10, 20, 100, 200]:
        print("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa dim={}".format(dim))

        
        
        betaVec= np.zeros(XtX.shape[0])
        etaVec= np.zeros(XtX.shape[0])
        
        print(datetime.datetime.now())
        EE=np.random.randn(XtX.shape[0], dim) * 0.0001 #* sparsityMask
        print(datetime.datetime.now())


        for itercnt in range(50):
            print("================= iterCnt: {}".format(itercnt))
            #print("  --- train FF")
            #print(datetime.datetime.now())
            #### update FFt
            ii_diag=np.diag_indices(XtX.shape[0])
            XtX[ii_diag]= XtXdiag +boost*XtXdiag  + L2reg+ rhoVec
            HH=EE.T.dot(XtX).dot(EE)
            #print("          10")
            HH= np.linalg.inv(HH)  .dot(EE.T)
            #print("          11")
            XtX[ii_diag]= XtXdiag
            GG= XtX *(1.0+betaVec)
            GG[ii_diag]+= rhoVec*  (betaVec-etaVec)
            FFt= HH.dot(GG)
            #######update EE
            #print("          1")
            HH= FFt.dot(FFt.T)
            HH=np.linalg.inv(HH)
            HH=FFt.T.dot(HH)
            #print("          3")
            XtX[ii_diag]= XtXdiag
            GG= XtX *(1.0+betaVec)
            GG[ii_diag]+= rhoVec*  (betaVec-etaVec)
            EE= GG.dot(HH)
            del GG
            EE=PP.dot(EE)
            ########## update betaVec
            EEFFdiag  = np.diag(EE.dot(FFt))
            if itercnt>0:
                XtX[ii_diag]= XtXdiag
                HH=np.diag(XtX.dot(EE).dot(FFt)) -XtXdiag +rhoVec* (etaVec+ EEFFdiag   )
                GG=XtXdiag-boost*XtXdiag  - L2reg +rhoVec
                betaVec=HH/GG
                del GG
                betaVec=np.maximum(betaVec, 0.0) # non-neg values !
            if (itercnt+1) in [1000]: #[1,5,10,20,30,40,50,60,70,80,90,100]:
                #plt.plot(betaTF, betaVec, 'x')
                #plt.show()
                plt.hist(EEFFdiag, bins=100, alpha=0.8,  density=True,histtype='step', color='black', linewidth=2.0,  label='calibrated')
                plt.show()
                print("avg diag value: {}".format(np.mean(EEFFdiag)))
                plt.plot(EEFFdiag, betaVec, 'x')
                plt.show()
                
                    
            if (itercnt+1) in [10000]: #[1,5,10,20,30,40,50,60,70,80,90,100]:
                EEnn= (1.0 /np.sqrt(np.sum(EE*EE,1)))[:,None]  *EE
                FFtnn= FFt / np.sqrt(np.sum(FFt*FFt,0))
                cosineEDLAE=np.diag(EEnn.dot(FFtnn))
                del EEnn
                del FFtnn
                plt.hist(cosineEDLAE, bins=100, alpha=0.8,  density=True,histtype='step', color='black', linewidth=2.0,  label='calibrated')
                plt.show()

            
            ########## update etaVec
            etaVec+= EEFFdiag-betaVec
            
            print(datetime.datetime.now())
            ###### eval
            if (itercnt+1) in [5,10,20,30,40,50, 60,80,100,120,150]:
                print("========================= eval:")
                BB= EE.dot(FFt)
                n100,r20,r50 = evaluate(BB)
                del BB
                print("[ %d,   %d,   %.3f,   %d,   %.3f,   %.3f,  %.3f ]" %(dim, L2reg, prob_dropout, rhoadd, n100, r20, r50))
                #print("%d %d\t%.3f\t%.3f\t%.3f" %(dim, L2reg, np.mean(n100_list), np.mean(r20_list), np.mean(r50_list)))

                print(datetime.datetime.now())

bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb prob_dropout= 0.05
precomputing
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa dim=10
2020-05-06 23:21:57.762949
2020-05-06 23:21:57.771322
2020-05-06 23:22:04.442526
2020-05-06 23:22:13.603850
2020-05-06 23:22:22.788427
2020-05-06 23:22:31.949101
2020-05-06 23:22:41.130425
2020-05-06 23:22:42.288478
0 ... 5000
5000 ... 10000
Test NDCG@100=0.31949 (0.00203)
Test Recall@20=0.28149 (0.00249)
Test Recall@50=0.39849 (0.00284)
2020-05-06 23:23:25.241326
[ 10,   5,   0.050,   500,   0.319,   0.281,  0.398 ]
2020-05-06 23:23:25.255187
2020-05-06 23:23:34.702010
2020-05-06 23:23:44.202635
2020-05-06 23:23:53.681218
2020-05-06 23:24:03.156729
2020-05-06 23:24:12.675880
2020-05-06 23:24:13.859675
0 ... 5000
5000 ... 10000
Test NDCG@100=0.32355 (0.00204)
Test Recall@20=0.28783 (0.00252)
Test Recall@50=0.40664 (0.00287)
2020-05-06 23:24:56.648511
[ 10,   5,   0.050,   500,   0.324,   0.288,  0.407 ]
2020-05-06 

2020-05-06 23:43:03.775228
2020-05-06 23:43:13.039542
2020-05-06 23:43:22.285584
2020-05-06 23:43:23.437820
0 ... 5000
5000 ... 10000
Test NDCG@100=0.35612 (0.00209)
Test Recall@20=0.32270 (0.00263)
Test Recall@50=0.44654 (0.00289)
2020-05-06 23:44:06.518930
[ 20,   5,   0.050,   500,   0.356,   0.323,  0.447 ]
2020-05-06 23:44:06.532644
2020-05-06 23:44:15.760172
2020-05-06 23:44:24.957397
2020-05-06 23:44:34.106686
2020-05-06 23:44:43.274504
2020-05-06 23:44:52.425809
2020-05-06 23:45:01.692205
2020-05-06 23:45:10.859619
2020-05-06 23:45:20.018290
2020-05-06 23:45:29.167495
2020-05-06 23:45:38.354379
2020-05-06 23:45:39.487428
0 ... 5000
5000 ... 10000
Test NDCG@100=0.35632 (0.00208)
Test Recall@20=0.32380 (0.00263)
Test Recall@50=0.44684 (0.00288)
2020-05-06 23:46:28.494414
[ 20,   5,   0.050,   500,   0.356,   0.324,  0.447 ]
2020-05-06 23:46:28.507792
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa dim=100
2020-05-06 23:46:28.508288
2020-05-06 23:46:28.591096


2020-05-07 00:07:33.564845
2020-05-07 00:07:47.407025
2020-05-07 00:08:01.729125
2020-05-07 00:08:15.664971
2020-05-07 00:08:29.386852
2020-05-07 00:08:42.892696
2020-05-07 00:08:56.403838
2020-05-07 00:09:10.093404
2020-05-07 00:09:23.853952
2020-05-07 00:09:25.444730
0 ... 5000
5000 ... 10000
Test NDCG@100=0.41633 (0.00215)
Test Recall@20=0.38655 (0.00269)
Test Recall@50=0.51517 (0.00285)
2020-05-07 00:10:08.725816
[ 200,   5,   0.050,   500,   0.416,   0.387,  0.515 ]
2020-05-07 00:10:08.739752
2020-05-07 00:10:22.250505
2020-05-07 00:10:35.758211
2020-05-07 00:10:49.667541
2020-05-07 00:11:03.837469
2020-05-07 00:11:17.708954
2020-05-07 00:11:31.626078
2020-05-07 00:11:45.526558
2020-05-07 00:11:59.402849
2020-05-07 00:12:13.213339
2020-05-07 00:12:27.073897
2020-05-07 00:12:28.643545
0 ... 5000
5000 ... 10000
Test NDCG@100=0.41528 (0.00214)
Test Recall@20=0.38299 (0.00268)
Test Recall@50=0.51374 (0.00285)
2020-05-07 00:13:12.121577
[ 200,   5,   0.050,   500,   0.415,   0.383,  0.

In [None]:
[ 10,   5,   0.050,   500,   0.325,   0.289,  0.408 ]
[ 20,   5,   0.050,   500,   0.356,   0.324,  0.447 ]
[ 50,   10,   0.050,   500,   0.389,   0.358,  0.484 ] 
[ 100,   5,   0.050,   500,   0.405,   0.374,  0.503 ]
[ 200,   5,   0.050,   500,   0.415,   0.382,  0.513 ]


[ 50,   400,   0.250,   2000,   0.384,   0.348,  0.475 ]
[ 50,   400,   0.200,   500,   0.386,   0.351,  0.479 ]
[ 50,   400,   0.100,   500,   0.387,   0.354,  0.481 ]
[ 50,   100,   0.100,   500,   0.388,   0.355,  0.482 ]
[ 50,   30,   0.100,   500,   0.388,   0.355,  0.482 ]
[ 50,   10,   0.100,   500,   0.388,   0.355,  0.483 ]
[ 50,   10,   0.050,   500,   0.389,   0.358,  0.484 ]         ....best
[ 50,   10,   0.020,   500,   0.389,   0.358,  0.483 ]
[ 50,   10,   0.010,   500,   0.387,   0.355,  0.479 ]

In [None]:
np.mean(XtXdiag)*0.1

In [None]:
plt.plot(betaTF, betaVec, 'x')
plt.show()
plt.plot(EEFFdiag, betaVec, 'x')
plt.show()
EEnn= (1.0 /np.sqrt(np.sum(EE*EE,1)))[:,None]  *EE
FFtnn= FFt / np.sqrt(np.sum(FFt*FFt,0))
cosineEDLAE=np.diag(EEnn.dot(FFtnn))
del EEnn
del FFtnn
plt.hist(cosineEDLAE, bins=100, alpha=0.8,  density=True,histtype='step', color='black', linewidth=2.0,  label='calibrated')
plt.show()

In [None]:
mm=dict()
mm["betaVec"]=-99#betaVec
mm["etaVec"] = -99#etaVec
mm["EE"]=EE
mm["FFt"]=FFt
save_pkl(mm, "/root/projects/mySLIM/paper/ml20m_AN_DLAE.pkl" )

In [None]:
mm = load_pkl( "/root/projects/mySLIM/paper/msd_AN_edlae1.pkl" )
betaVec_orig = mm["betaVec"]
etaVec_orig = mm["etaVec"] 
EE_orig = mm["EE"]
FFt_orig  = mm["FFt"]


In [None]:
del mm

In [None]:
betaVec_orig=deepcopy(betaVec)
etaVec_orig=deepcopy(etaVec)
EE_orig=deepcopy(EE)
FFt_orig=deepcopy(FFt)


In [None]:
# EDLAe with beta learning



#print("precomputing")
#precompute
#ii_diag=np.diag_indices(XtX.shape[0])
#XtX[ii_diag]=L2reg+boost*XtXdiag  +XtXdiag 
#PP=np.linalg.inv(XtX)

for rhoadd in [500.0 ]:
  for prob_dropout in [  0.1]:
    boost= prob_dropout/(1.0-prob_dropout)
    print("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb prob_dropout= {}".format(prob_dropout))
    for L2reg in [  10.0 ]:
      for dim in  [   1000 ] : #[ 10 , 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000]:
        print("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa dim={}".format(dim))

        
        rhoVec = boost*XtXdiag  + L2reg +rhoadd
        
        betaVec= deepcopy(betaVec_orig)
        etaVec= deepcopy(etaVec_orig)
        
        print(datetime.datetime.now())
        EE=deepcopy(EE_orig)
        print(datetime.datetime.now())

        print("precomputing")
        #precompute
        ii_diag=np.diag_indices(XtX.shape[0])
        XtX[ii_diag]= XtXdiag +boost*XtXdiag  + L2reg+ rhoVec
        PP=np.linalg.inv(XtX)


        for itercnt in range(20):
            print("================= iterCnt: {}".format(itercnt))
            #print("  --- train FF")
            #print(datetime.datetime.now())
            #### update FFt
            ii_diag=np.diag_indices(XtX.shape[0])
            XtX[ii_diag]= XtXdiag +boost*XtXdiag  + L2reg+ rhoVec
            HH=EE.T.dot(XtX).dot(EE)
            #print("          10")
            HH= np.linalg.inv(HH)  .dot(EE.T)
            #print("          11")
            XtX[ii_diag]= XtXdiag
            GG= XtX *(1.0+betaVec)
            GG[ii_diag]+= rhoVec*  (betaVec-etaVec)
            FFt= HH.dot(GG)
            del GG
            #######update EE
            #print("          1")
            HH= FFt.dot(FFt.T)
            HH=np.linalg.inv(HH)
            HH=FFt.T.dot(HH)
            #print("          3")
            XtX[ii_diag]= XtXdiag
            GG= XtX *(1.0+betaVec)
            GG[ii_diag]+= rhoVec*  (betaVec-etaVec)
            EE= GG.dot(HH)
            del GG
            EE=PP.dot(EE)
            ########## update betaVec
            EEFFdiag  = np.diag(EE.dot(FFt))
            if itercnt>0:
                XtX[ii_diag]= XtXdiag
                HH=np.diag(XtX.dot(EE).dot(FFt)) -XtXdiag +rhoVec* (etaVec+ EEFFdiag   )
                GG=XtXdiag +rhoVec -boost*XtXdiag  - L2reg
                betaVec=HH/GG
                del GG
                betaVec=np.maximum(betaVec, 0.0) # non-neg values !
                if (itercnt+1) in [1,5,10,20,30,40]:
                    plt.plot(betaTF, betaVec, 'x')
                    plt.show()
                    plt.plot(EEFFdiag, betaVec, 'x')
                    plt.show()
            if (itercnt+1) in [1,5,10,20,30,40]:
                EEnn= (1.0 /np.sqrt(np.sum(EE*EE,1)))[:,None]  *EE
                FFtnn= FFt / np.sqrt(np.sum(FFt*FFt,0))
                cosineEDLAE=np.diag(EEnn.dot(FFtnn))
                del EEnn
                del FFtnn
                plt.hist(cosineEDLAE, bins=100, alpha=0.8,  density=True,histtype='step', color='black', linewidth=2.0,  label='calibrated')
                plt.show()
            
            ########## update etaVec
            etaVec+= EEFFdiag-betaVec
            
            print(datetime.datetime.now())
            ###### eval
            if (itercnt+1) in [5,10,20,30,40,60,80,100]:
                print("========================= eval:")
                BB= EE.dot(FFt)
                evaluate(BB)
                del BB
                #print("%d %d\t%.3f\t%.3f\t%.3f" %(dim, L2reg, np.mean(n100_list), np.mean(r20_list), np.mean(r50_list)))

                print(datetime.datetime.now())

In [None]:
plt.plot(EEFFdiag, betaVec, 'x')

In [None]:
import sys

local_vars = list(locals().items())
for var, obj in local_vars:
    print(var, sys.getsizeof(obj))

In [None]:
del GG

In [None]:
del PP

In [None]:
EEnn= (1.0 /np.sqrt(np.sum(EE*EE,1)))[:,None]  *EE
FFtnn= FFt / np.sqrt(np.sum(FFt*FFt,0))
cosineEDLAE=np.diag(EEnn.dot(FFtnn))
del EEnn
del FFtnn
plt.hist(cosineEDLAE, bins=100, alpha=0.8,  density=True,histtype='step', color='black', linewidth=2.0,  label='calibrated')
plt.show()

In [None]:
#save_pkl(cosineEDLAE, "/root/projects/mySLIM/paper/nflx_cosineANedlae.pkl" )

In [None]:
betaTF=load_pkl("/root/projects/mySLIM/paper/msd_betaTF.pkl")

# create figures

In [None]:
#mm = load_pkl( "/root/projects/mySLIM/paper/msd_AN_edlae_best.pkl" )
mm = load_pkl( "/root/projects/mySLIM/paper/msd_AN_DLAE.pkl" )

#mm = load_pkl( "/root/projects/mySLIM/paper/ml20m_AN_EDLAE_learndiag.pkl")
#mm = load_pkl( "/root/projects/mySLIM/paper/ml20m_AN_DLAE.pkl" )

#mm = load_pkl( "/root/projects/mySLIM/paper/nflx_AN_edlaebest.pkl")
#mm = load_pkl( "/root/projects/mySLIM/paper/nflx_AN_DLAE.pkl")

#betaVec_orig = mm["betaVec"]
#etaVec_orig = mm["etaVec"] 
EE = mm["EE"]
FFt  = mm["FFt"]


In [None]:
EEnn= (1.0 /np.sqrt(np.sum(EE*EE,1)))[:,None]  *EE
FFtnn= FFt / np.sqrt(np.sum(FFt*FFt,0))
#cosineEDLAE=np.diag(EEnn.dot(FFtnn))
cosine_DLAE=np.diag(EEnn.dot(FFtnn))



In [None]:
matplotlib.rcParams.update({'font.size': 22})

fig=plt.figure()
plt.hist(cosineEDLAE, bins=20, alpha=0.8,  density=True,histtype='step', color='black', linewidth=2.0,  label='calibrated')
plt.hist(cosine_DLAE, bins=20, alpha=0.8,  density=True,histtype='step', color='blue', linewidth=2.0, linestyle='--', label='calibrated')
#plt.plot(ml20_diag[:,0] , ml20_diag[:,2], 'o-k')
#plt.plot(ml20_droppeddiag[:,0] , ml20_droppeddiag[:,2], '+--g')
plt.xlabel('cosine')
plt.ylabel('density')
#plt.xscale('log')
#plt.legend()
#fig.savefig('/root/projects/mySLIM/paper/msd_cosine.pdf',  bbox_inches='tight')


In [None]:
# ml20============================================== diag enforced
# dim   l2reg     ndcg    r20     r50

ml20_diag=np.array([   [10, 2000 ,           0.284 ,   0.247  ,  0.354 ],
[20, 2000 ,           0.327 ,   0.294  ,  0.409  ],
[50, 500  ,          0.366 ,   0.333 ,   0.451 ],
[100, 500 ,        0.388,   0.356 ,  0.478 ],
[200, 500 ,     0.404,    0.373 ,   0.496 ],
[500, 500 ,           0.414  ,  0.384  ,  0.512 ],
[1000, 500,    0.417,    0.388 ,   0.516 ],
[5000, 500,    0.420,    0.391 ,   0.521 ],
[10000, 500,    0.420,    0.391  ,  0.521 ],
[20108,   500,      0.420 ,  0.391 ,  0.521 ]     ])



ml20_droppeddiag= np.array([   [10, 3000 ,    0.323 ,   0.290  ,  0.407],
[20 ,10000 ,   0.350 ,   0.319  ,  0.435],
[50 ,30000 ,   0.368 ,   0.337 ,   0.455],
[100, 30000 ,   0.379 ,   0.351 ,   0.472],
[200, 30000 ,   0.390 ,   0.362 ,   0.485],
[500, 30000 ,   0.397 ,   0.365 ,   0.492],                          
[1000 ,10000 ,   0.407,    0.377 ,   0.511],
[5000 ,10000 ,   0.407 ,   0.376 ,   0.511],
[10000, 10000 ,   0.407 ,   0.376,    0.511],
[20108,  10000 ,    0.407,   0.376 ,  0.511]     ])

# dim   l2reg  pdrop_in_L2xtx   rho_add_admm  ndcg     r20    r50
ml20_learndiag= np.array([   [ 10,   5,   0.050,   500,   0.325,   0.289,  0.408 ],
[ 20,   5,   0.050,   500,   0.356,   0.324,  0.447 ],
[ 50,   10,   0.050,   500,   0.389,   0.358,  0.484 ], 
[ 100,   5,   0.050,   500,   0.405,   0.374,  0.503 ],
[ 200,   5,   0.050,   500,   0.415,   0.382,  0.513 ],
[ 500,   400,   0.333,   500,   0.416,   0.382,  0.515 ],
[ 1000,   400,   0.333,   500,   0.418,   0.384,  0.516 ],
[ 2000,   400,   0.333,   500,   0.419,   0.385,  0.518 ],
[ 5000,   400,   0.333,   500,   0.419,   0.385,  0.518 ],
[10000, 400,   0.333,   500,     0.420,    0.391  ,  0.521 ],
[20108,   400,   0.333,   500,   0.420 ,  0.391 ,  0.521 ]  
])


In [None]:
# nflx +++++++++++++++++++++++++++++++++++++++++++ diag enforced
# dim   l2reg     ndcg     r20    r50

nflx_diag=np.array([  [10 ,1000 ,    0.260 ,   0.225  ,  0.293],
[20 ,1000 ,    0.291 ,   0.256  ,  0.331],
[50 ,1000 ,    0.328 ,   0.296  ,  0.371],
[100, 1000 ,   0.351 ,   0.319  ,  0.396],
[200, 1000 ,   0.368 ,   0.338  ,  0.416],
[500, 1000 ,   0.383 ,   0.353  ,  0.433],
[1000, 1000 ,   0.389 ,   0.358 ,   0.439],
[2000 ,1000 ,   0.392 ,   0.360  ,  0.443],
[5000 ,1000 ,   0.393  ,  0.361  ,  0.445],
[10000, 1000 ,   0.393 ,   0.362 ,   0.445],
[17769, 1000 ,   0.393 ,   0.362  ,  0.445]  ])


nflx_droppeddiag=np.array([ [10, 30000 ,   0.291  ,  0.259   , 0.336],
[20, 30000 ,   0.319  ,  0.286   , 0.366],
[50, 30000 ,   0.344  ,  0.311   , 0.389],
[100, 100000,    0.350 ,   0.319 ,   0.397],
[200, 100000,    0.355 ,   0.325 ,   0.405],  
[500, 100000,    0.367 ,   0.336  ,  0.416],
[1000, 100000,    0.369,    0.337  ,  0.418],
[2000, 30000 ,   0.379 ,   0.346 ,   0.431],
[5000, 30000 ,   0.380 ,   0.347 ,   0.431],
[10000, 30000,    0.380 ,   0.346 ,   0.431],
[17769, 30000,      0.380 ,   0.346  , 0.431]   ])

# dim   l2reg  pdrop_in_L2xtx   rho_add_admm  ndcg     r20    r50
nflx_learndiag=np.array([ [ 10,   30,   0.100,   500,   0.293,   0.260,  0.338 ],
[ 20,   30,   0.100,   500,   0.320,   0.287,  0.368 ],
[ 50,   30,   0.100,   500,   0.350,   0.317,  0.399 ] ,
[ 100,   30,   0.100,   500,   0.367,   0.334,  0.417 ],
[ 200,   30,   0.100,   500,   0.380,   0.347,  0.430 ],
[ 500,   500,   0.333,   500,   0.388,   0.355,  0.438 ],
[1000,   500,   0.333,   500,   0.392,   0.359,  0.443 ],
[ 2000,   500,   0.333,   500,   0.393,   0.361,  0.445 ],
[ 5000,   500,   0.333,   500,   0.393,   0.362,  0.445 ],
[10000, 500,   0.333,   500,   0.393 ,   0.362 ,   0.445],
[17769, 500,   0.333,   500,   0.393 ,   0.362  ,  0.445] 
])



In [None]:

# dim   l2reg     ndcg     r20    r50
msd_diag=np.array([   [ 10, 100 ,   0.088  ,  0.066 ,   0.100],
[20, 100 ,   0.106  ,  0.080 ,   0.118],
[50 ,100 ,   0.142  ,  0.110 ,   0.157],
[100, 100,    0.174 ,   0.138,    0.191],
[200, 100 ,   0.211 ,   0.169 ,   0.231  ],
[500 ,200 ,   0.263 ,   0.216 ,   0.290],
[1000, 200 ,   0.302 ,   0.249,    0.333],
[2000, 200 ,   0.336  ,  0.281 ,   0.370],
[5000, 200 ,   0.368  ,  0.312  ,  0.406],
[10000, 200 ,   0.381 ,   0.325 ,   0.420],
[20000, 200,    0.388 ,   0.332 ,   0.427],
[41140,   200 ,    0.389 ,   0.333 ,   0.428]    ])

# dim   l2reg     ndcg     r20    r50
msd_droppeddiag = np.array([   [10 ,1000,    0.097 ,   0.073  ,  0.109],
[20 ,1000 ,   0.117 ,   0.089 ,   0.130 ],
[50 ,30000 ,   0.142  ,  0.111 ,   0.158],
[100, 30000 ,   0.167 ,   0.132 ,   0.184],
[200, 30000 ,   0.195 ,   0.157  ,  0.216  ],
[500, 30000 ,   0.239 ,   0.194  ,  0.264],
[1000, 30000,    0.266 ,   0.217  ,  0.296],
[2000, 5000 ,   0.292 ,   0.240  ,  0.328],
[5000 ,5000 ,   0.328 ,   0.271  ,  0.367],
[10000, 5000 ,   0.344 ,   0.285 ,   0.384],
[20000, 5000 ,   0.347 ,   0.287 ,   0.388],
[41140, 5000 ,      0.347 ,  0.284 ,  0.384 ]     ])

# dim   l2reg  pdrop_in_L2xtx   rho_add_admm  ndcg     r20    r50
msd_learndiag = np.array([ [ 10,   10,   0.150,   500,   0.099,   0.075,  0.111 ],
[ 20,   10,   0.150,   500,   0.123,   0.093,  0.137 ],
[ 50,   10,   0.150,   500.000,   0.160,   0.125,  0.179 ],
[ 100,   10,   0.150,   500,   0.192,   0.152,  0.213 ],
[200,  10, 0.1, 500,  0.22908, 0.18261,0.25379],
[500,  10, 0.1, 500,  0.28053,0.22724,0.31061 ],
[1000,  10, 0.1, 500,  0.31973,0.26288,0.26288 ],  
[2000,  10, 0.1, 500,  0.35024, 0.29261 , 0.38471],
[5000,  10, 0.1, 500, 0.37385, 0.31587 , 0.41165],
[ 10000,   10,   0.100,   500.000,   0.381,   0.324,  0.421 ],
[ 20000,   10,   0.100,   500.000,   0.388 ,  0.332,  0.427],
[41140,   10,   0.100,   500.000,    0.389 ,   0.333 ,   0.428]    
])



In [None]:
matplotlib.rcParams.update({'font.size': 22})

fig=plt.figure()
plt.plot(msd_diag[:,0] , msd_diag[:,2],  'x-g', linewidth=2.0)
plt.plot(msd_droppeddiag[:,0] , msd_droppeddiag[:,2] , '+--b', linewidth=2.0)
plt.plot(msd_learndiag[:,0] , msd_learndiag[:,4] , 'o-k', linewidth=2.0)
plt.xlabel('matrix-rank')
plt.ylabel('nDCG@100')
plt.xscale('log')
#plt.legend()
fig.savefig('/root/projects/mySLIM/paper/msd_ranksweep.pdf',  bbox_inches='tight')
