In [47]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import sys, os
sys.path.append('../smc')

import numpy as np   
import pandas as pd
from tqdm import tqdm
from time import time
import sys
from libreco.algorithms import NCF


from utils import *     # contains some useful helper functions 
from models import *    # toy models
from solvers import *   # matrix completion solvers
from methods import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [49]:
# Fixed data parameters
max_test_queries = 100            
max_calib_queries = 1000
matrix_generation_seed = 2024    # Data matrix is fixed 

solvers = [
#    "pmf", 
#    "nnm", 
#    "svt",
    "ncf"
]

model = "RFM"

n1 = n2 = 200
noise_model = "step"
mu = 15
prop_obs = 0.2
gamma_n = 0.5
gamma_m = 0.9

# Other parameters
verbose = True
allow_inf = False
alpha = 0.1

r=5
k=2
repetition = 1

In [50]:
#################
# Generate Data #
#################
if model == "RFM":
    mm = RandomFactorizationModel(n1 ,n2, 5)
elif model == "ROM":
    mm = RandomOrthogonalModel(n1 ,n2, 5)
else:
    mm = RandomFactorizationModel(n1 ,n2, 5)

if verbose:
    print('Fixing the ground truth matrix generated from the {} model.\n'.format(model))
    sys.stdout.flush()

U, V, M_true = mm.sample_noiseless(matrix_generation_seed)

Fixing the ground truth matrix generated from the RFM model.



In [51]:
# Header for results file
def add_header(df):
    df["n1"] = n1
    df["n2"] = n2
    df['alpha'] = alpha
    df['r_true'] = 5
    df['r_guess'] = r
    df['gamma_n'] = gamma_n
    df['gamma_m'] = gamma_m
    df['mu'] = mu
    return df
    

def run_single_experiment(M_true, k, alpha, prop_obs, max_test_queries, max_calib_queries,
                          r, gamma_n=0, gamma_m=0, mu=1, random_state=0):
    res = pd.DataFrame({})


    #-------Generate masks----------#
    #-------------------------------#
    n1, n2 = M_true.shape
    sampler = QuerySampling(n1,n2)
    mask_obs, mask_test = sampler.sample_submask(sub_size=prop_obs, random_state=random_state)
    n_calib_queries = min(int(0.5 * np.sum(np.sum(mask_obs, axis=1) // k)), max_calib_queries)


    #------Sample test queries------#
    #-------------------------------#
    n_test_queries = min(int(0.99 * np.sum(np.sum(mask_test, axis=1) // k)), max_test_queries)
    _, idxs_test, _ = sampler.sample_train_calib(mask_test, k, calib_size=n_test_queries, random_state=random_state)  
    if verbose:
        print("Training size:{}, calib size: {}, test size: {}\n".format(np.sum(mask_obs)-n_calib_queries*k, n_calib_queries, n_test_queries))
        sys.stdout.flush()

    
    #--------Generate noise---------#
    #-------------------------------#
    nm = NoiseModel(random_state)
    M = nm.get_noisy_matrix(M_true, gamma_n=gamma_n, gamma_m=gamma_m, model=noise_model, 
                            mu=mu, alpha=alpha, normalize=False)


    for solver in solvers:
        #------Split train calib--------#
        #-------------------------------#
        mask_train, idxs_calib, _ = sampler.sample_train_calib(mask_obs, k, 
                                    calib_size=n_calib_queries, random_state=random_state)
        # nres = compute_error(M, np.multiply(M, mask_train), np.ones_like(M)-mask_train)
        # print(f"Dummy Normalized residual: {nres}\n")
        #--------Model Training---------#
        #-------------------------------#
        print("Running matrix completion algorithm on the training set...")
        sys.stdout.flush()
        tik = time()
        if solver == "pmf":
            Mhat, _, _ = pmf_solve(M, mask_train, k=r, verbose=verbose, random_state=random_state)
        elif solver == "svt":
            Mhat = svt_solve(M, mask_train, tau=5 * np.sum(M.shape) / 2, delta=2,verbose = verbose, random_state = random_state)
        elif solver == "nnm":
            Mhat = nnm_solve(M, mask_train, verbose=verbose, random_state=random_state)
        elif solver == "ncf":
            Mhat = ncf_solve(M, mask_train, embed_size=r, n_epochs=20, verbose=verbose, random_state=random_state)
        
        tok=time()
        print(f"run time for {solver} is {tok-tik}.")
        mae, rmse, relative_error = compute_error(M, Mhat, np.ones_like(M)-mask_train)
        print(f"Done training with {solver}! Frobenius_error: {relative_error}\n")
        sys.stdout.flush()
    
    
        #------Compute intervals--------# 
        #-------------------------------#
        ci_method = SimulCI(M, Mhat, mask_obs, idxs_calib, k)
        df = ci_method.get_CI(idxs_test, alpha, allow_inf=allow_inf)
        lower, upper, is_inf= df.loc[0].lower, df.loc[0].upper, df.loc[0].is_inf
        tmp_res = evaluate_SCI(lower, upper, k, M, idxs_test, is_inf=is_inf, method="conformal")
        tmp_res['solver'] = solver
        tmp_res['MAE'] = mae
        tmp_res['RMSE'] = rmse
        tmp_res['Frobenius_error'] = relative_error
        tmp_res['solver_runtime'] = tok-tik 
        res = pd.concat([res, tmp_res])

    res['k'] = k     
    res['Calib_queries'] = n_calib_queries
    res['Train_entries'] = np.sum(mask_train)
    res['Test_queries'] = n_test_queries
    res['random_state'] = random_state
    return res

In [52]:
seed = 1 

#####################
#  Run Experiments  #
#####################
results = pd.DataFrame({})

for i in tqdm(range(1, repetition+1), desc="Repetitions", leave=True, position=0):
    random_state = repetition * (seed-1) + i
    
    res = run_single_experiment(M_true, k, alpha, prop_obs, max_test_queries, max_calib_queries,
                        r, gamma_n=gamma_n, gamma_m=gamma_m, mu=mu, random_state=random_state)
    
    results = pd.concat([results, res])

add_header(results)

Repetitions:   0%|                                                       | 0/1 [00:00<?, ?it/s]

Training size:6000, calib size: 1000, test size: 100

Running matrix completion algorithm on the training set...


  net = tf.layers.batch_normalization(net, training=is_training)
  net = tf.layers.batch_normalization(net, training=is_training)


Training NCF...
Training start time: [35m2026-01-04 10:14:25[0m



train:   0%|                                                            | 0/47 [00:00<?, ?it/s][A
train:   2%|█                                                   | 1/47 [00:00<00:10,  4.25it/s][A
train: 100%|██████████████████████████████████████████████████| 47/47 [00:00<00:00, 138.88it/s][A


Epoch 1 elapsed: 0.343s
	 [32mtrain_loss: 2.9154[0m



train: 100%|██████████████████████████████████████████████████| 47/47 [00:00<00:00, 477.41it/s][A


Epoch 2 elapsed: 0.102s
	 [32mtrain_loss: 1.7823[0m



train: 100%|██████████████████████████████████████████████████| 47/47 [00:00<00:00, 533.17it/s][A


Epoch 3 elapsed: 0.092s
	 [32mtrain_loss: 1.6056[0m



train: 100%|██████████████████████████████████████████████████| 47/47 [00:00<00:00, 533.97it/s][A


Epoch 4 elapsed: 0.091s
	 [32mtrain_loss: 1.5474[0m



train: 100%|██████████████████████████████████████████████████| 47/47 [00:00<00:00, 556.46it/s][A


Epoch 5 elapsed: 0.089s
	 [32mtrain_loss: 1.4852[0m



train: 100%|██████████████████████████████████████████████████| 47/47 [00:00<00:00, 516.88it/s][A


Epoch 6 elapsed: 0.094s
	 [32mtrain_loss: 1.3684[0m



train: 100%|██████████████████████████████████████████████████| 47/47 [00:00<00:00, 601.66it/s][A


Epoch 7 elapsed: 0.082s
	 [32mtrain_loss: 1.3386[0m



train: 100%|██████████████████████████████████████████████████| 47/47 [00:00<00:00, 591.98it/s][A


Epoch 8 elapsed: 0.082s
	 [32mtrain_loss: 1.2598[0m



train: 100%|██████████████████████████████████████████████████| 47/47 [00:00<00:00, 599.14it/s][A


Epoch 9 elapsed: 0.082s
	 [32mtrain_loss: 1.2141[0m



train: 100%|██████████████████████████████████████████████████| 47/47 [00:00<00:00, 630.26it/s][A


Epoch 10 elapsed: 0.078s
	 [32mtrain_loss: 1.1488[0m



train: 100%|██████████████████████████████████████████████████| 47/47 [00:00<00:00, 630.99it/s][A


Epoch 11 elapsed: 0.077s
	 [32mtrain_loss: 1.0464[0m



train: 100%|██████████████████████████████████████████████████| 47/47 [00:00<00:00, 615.08it/s][A


Epoch 12 elapsed: 0.082s
	 [32mtrain_loss: 0.9811[0m



train: 100%|██████████████████████████████████████████████████| 47/47 [00:00<00:00, 586.87it/s][A


Epoch 13 elapsed: 0.083s
	 [32mtrain_loss: 0.9573[0m



train: 100%|██████████████████████████████████████████████████| 47/47 [00:00<00:00, 666.55it/s][A


Epoch 14 elapsed: 0.075s
	 [32mtrain_loss: 0.9072[0m



train: 100%|██████████████████████████████████████████████████| 47/47 [00:00<00:00, 630.63it/s][A


Epoch 15 elapsed: 0.080s
	 [32mtrain_loss: 0.8477[0m



train: 100%|██████████████████████████████████████████████████| 47/47 [00:00<00:00, 661.24it/s][A


Epoch 16 elapsed: 0.074s
	 [32mtrain_loss: 0.8212[0m



train: 100%|██████████████████████████████████████████████████| 47/47 [00:00<00:00, 649.08it/s][A


Epoch 17 elapsed: 0.075s
	 [32mtrain_loss: 0.8311[0m



train: 100%|██████████████████████████████████████████████████| 47/47 [00:00<00:00, 618.61it/s][A


Epoch 18 elapsed: 0.079s
	 [32mtrain_loss: 0.7633[0m



train: 100%|██████████████████████████████████████████████████| 47/47 [00:00<00:00, 655.32it/s][A


Epoch 19 elapsed: 0.076s
	 [32mtrain_loss: 0.7421[0m



train: 100%|██████████████████████████████████████████████████| 47/47 [00:00<00:00, 661.90it/s][A


Epoch 20 elapsed: 0.074s
	 [32mtrain_loss: 0.7156[0m
run time for ncf is 2.6038308143615723.
Done training with ncf! Frobenius_error: 0.5560759690683079

Computing conformal prediction intervals for 100 test queries...


CI: 100%|████████████████████████████████████████████████████| 100/100 [00:01<00:00, 64.75it/s]

Done!



Repetitions: 100%|███████████████████████████████████████████████| 1/1 [00:05<00:00,  5.16s/it]


Unnamed: 0,Query_coverage,Coverage,Size,metric,Inf_prop,Method,solver,MAE,RMSE,Frobenius_error,...,Test_queries,random_state,n1,n2,alpha,r_true,r_guess,gamma_n,gamma_m,mu
0,0.88,0.94,4.487586,mean,0.0,conformal,ncf,0.810175,1.083594,0.556076,...,100,1,200,200,0.1,5,5,0.5,0.9,15
