# BIVAECF Cornac experiments
### 1. Init
#### 1.1. Import libraries

In [3]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

import sys
sys.path.append("../../")


import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

from IMPACT import utils
utils.set_seed(0)
from IMPACT import dataset
from IMPACT import model
from experiments.datasets.external_packages import cornac_util
from experiments.datasets.external_packages import BiVAECFCAT

import optuna
import gc
import json
import torch
import pandas as pd
from importlib import reload


from cornac.models import PMF
from cornac.metrics import MAE, RMSE, RatingMetric
import logging
import cornac

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


#### 1.2. Set up the loggers

In [10]:
utils.setuplogger(verbose = True, log_name="BIVAECF_cornac")

#### 1.4. Parametrize the datasets

In [11]:
# choose dataset here
dataset_name = 'postcovid'
version= ""#"_small"
# modify config here

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("CUDA is available. Using GPU.")
else:
    device = torch.device("cpu")
    print("CUDA is not available. Using CPU.")

config = {

    # General params
    'seed' : 0,

    # Saving params
    'load_params': False,
    'save_params': False,
    'embs_path' : '../embs/'+str(dataset_name),
    'params_path' :'../ckpt/'+str(dataset_name),

    # training mode
    'early_stopping' : True,
    'fast_training' : True, # (Only taken in account if early_stopping == true) If true, doesn't compute valid rmse PC-ER

    # Learning params
    'learning_rate': 0.001,
    'batch_size': 2048,
    'num_epochs': 200,
    'num_dim': 10, # for IRT or MIRT todo : is it necessary as we use concepts knowledge number as embedding dimension ?
    'eval_freq' : 1,
    'patience' : 30,
    'device': device,
    'lambda' : 7.7e-6,
    'tensorboard': False,
    'flush_freq' : True,
    
    # for NeuralCD
    'prednet_len1': 128,
    'prednet_len2': 64,
    'best_params_path':'',
    
    #For GCCD
    'num_layers': 0,
    'version': 'pair',
    'p_dropout': 0,
    'low_mem_mode' : True,
    'user_nbrs_n' : 10,
    'item_nbrs_n' : 5
}
concept_map = json.load(open(f'../datasets/{dataset_name}/concept_map.json', 'r'))
concept_map = {int(k):[int(x) for x in v] for k,v in concept_map.items()}
metadata = json.load(open(f'../datasets/{dataset_name}/metadata.json', 'r'))
utils.set_seed(config['seed'])
dataset_name += version
logging.info(f'#### {dataset_name} ####')
logging.info(f'#### config : {config} ####')

[INFO 34:49] #### postcovid ####
[INFO 34:49] #### config : {'seed': 0, 'load_params': False, 'save_params': False, 'embs_path': '../embs/postcovid', 'params_path': '../ckpt/postcovid', 'early_stopping': True, 'fast_training': True, 'learning_rate': 0.001, 'batch_size': 2048, 'num_epochs': 200, 'num_dim': 10, 'eval_freq': 1, 'patience': 30, 'device': 'cuda:0', 'lambda': 7.7e-06, 'tensorboard': False, 'flush_freq': True, 'prednet_len1': 128, 'prednet_len2': 64, 'best_params_path': '', 'num_layers': 0, 'version': 'pair', 'p_dropout': 0, 'low_mem_mode': True, 'user_nbrs_n': 10, 'item_nbrs_n': 5} ####


In [12]:
def generate_BIVAECF(config,metadata = None) :
    return BiVAECFCAT(name='BiVAECF', k=metadata["num_dimension_id"], encoder_structure=config['enc_str'], act_fn=config['activation_func'], likelihood='gaus', n_epochs=config['num_epochs'], batch_size=config['batch_size'], learning_rate=config['learning_rate'], beta_kl=config['beta_kl'], cap_priors={'item': False, 'user': False}, trainable=True, verbose=True, seed=config['seed'], use_gpu=True)

### 2. CDM Training


In [13]:
reload(utils)
reload(model)
reload(dataset)

seed = 0
utils.set_seed(0)

config['seed'] = seed
config['early_stopping'] = True
config['esc'] = 'objectives' #'loss' 'delta_objectives'
config['num_epochs']=200
config['eval_freq']=1
config['patience']=30

config['verbose_early_stopping'] = False
config["tensorboard"] = False
config['flush_freq'] = False
config['save_params']= False
config['disable_tqdm'] = True

In [None]:
reload(cornac_util)

dataset_name = "postcovid"
eval_method,concept_map,metadata = cornac_util.load_dataset(dataset_name,config, 0)

study = optuna.create_study(
    directions=["minimize"],  # Specify directions for each objective
)
gc.collect()
torch.cuda.empty_cache()
study.optimize(lambda trial: cornac_util.objective_BIVAECF(trial, config, metadata,eval_method,generate_BIVAECF), n_trials=200, timeout=3600, n_jobs=4, gc_after_trial=True)

# Analyze the results
## requirements : plotly, nbformat
pareto_trials = study.best_trials
logging.info(f"Best trial for {dataset_name} : {study.best_trials}") 

logging.info("Number of trials :"+str(len(study.trials)))
for trial in study.trials:
    logging.info(f"Trial #{trial.number}")
    logging.info(f"  RMSE: {trial.values}")
    #logging.info(f"  DOA: {trial.values[1]}")
    logging.info(f"  Params: {trial.params}")

dataset_name = "promis"
eval_method,concept_map,metadata = cornac_util.load_dataset(dataset_name,config, 0)

study = optuna.create_study(
    directions=["minimize"],  # Specify directions for each objective
)
gc.collect()
torch.cuda.empty_cache()
study.optimize(lambda trial: cornac_util.objective_BIVAECF(trial, config, metadata, eval_method,generate_BIVAECF), n_trials=200, timeout=7200, n_jobs=4, gc_after_trial=True)

# Analyze the results
## requirements : plotly, nbformat
logging.info(f"Best trial for {dataset_name} : {study.best_trials}") 

logging.info("Number of trials :"+str(len(study.trials)))
for trial in study.trials:
    logging.info(f"Trial #{trial.number}")
    logging.info(f"  RMSE: {trial.values}")
    #logging.info(f"  DOA: {trial.values[1]}")
    logging.info(f"  Params: {trial.params}")

dataset_name = "movielens"
eval_method,concept_map,metadata = cornac_util.load_dataset(dataset_name,config, 0)

study = optuna.create_study(
    directions=["minimize"],  # Specify directions for each objective
)
gc.collect()
torch.cuda.empty_cache()
study.optimize(lambda trial: cornac_util.objective_BIVAECF(trial, config,  metadata,eval_method,generate_BIVAECF), n_trials=200, timeout=7200, n_jobs=4, gc_after_trial=True)

# Analyze the results
## requirements : plotly, nbformat
logging.info(f"Best trial for {dataset_name} : {study.best_trials}") 

logging.info("Number of trials :"+str(len(study.trials)))
for trial in study.trials:
    logging.info(f"Trial #{trial.number}")
    logging.info(f"  RMSE: {trial.values}")
    #logging.info(f"  DOA: {trial.values[1]}")
    logging.info(f"  Params: {trial.params}")
    
dataset_name = "portrait"
eval_method,concept_map,metadata = cornac_util.load_dataset(dataset_name,config, 0)

study = optuna.create_study(
    directions=["minimize"],  # Specify directions for each objective
)
gc.collect()
torch.cuda.empty_cache()
study.optimize(lambda trial: cornac_util.objective_BIVAECF(trial, config, metadata, eval_method,generate_BIVAECF), n_trials=200, timeout=7200, n_jobs=4, gc_after_trial=True)

# Analyze the results
## requirements : plotly, nbformat
logging.info(f"Best trial for {dataset_name} : {study.best_trials}") 

logging.info("Number of trials :"+str(len(study.trials)))
for trial in study.trials:
    logging.info(f"Trial #{trial.number}")
    logging.info(f"  RMSE: {trial.values}")
    #logging.info(f"  DOA: {trial.values[1]}")
    logging.info(f"  Params: {trial.params}")


[I 2024-12-25 21:34:57,397] A new study created in memory with name: no-name-29731ad5-ac74-471a-a857-2c882361d19c


  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

  0%|          | 0/200 [00:00<?, ?it/s]

### 3. CDM Prediction
#### 3.1. Training and testing

In [None]:
def find_emb(algo):
    return algo.bivae.theta.detach().cpu().numpy()

In [None]:
reload(cornac_util)

dataset_name = "postcovid"
logging.info(dataset_name)
config['learning_rate'] = 2e-5
config['lambda'] = 6.9e-6
config['d1'] = 15
config['d2'] = 11
config['beta_kl'] = 0.68845
metrics = cornac_util.test(dataset_name,config,generate_BIVAECF,find_emb)

dataset_name = "promis"
logging.info(dataset_name)
config['learning_rate'] = 1e-5
config['lambda'] = 2.1e-6
config['d1'] = 10
config['d2'] = 40
config['beta_kl'] = 0.07
metrics = cornac_util.test(dataset_name,config,generate_BIVAECF,find_emb)

dataset_name = "movielens"
logging.info(dataset_name)
config['learning_rate'] = 1e-5
config['lambda'] = 1.1e-6
config['d1'] = 29
config['d2'] = 19
config['beta_kl'] = 0.25967
metrics = cornac_util.test(dataset_name,config,generate_BIVAECF,find_emb)

dataset_name = "portrait"
logging.info(dataset_name)
config['learning_rate'] = 1e-5
config['lambda'] = 1.12e-6
config['d1'] = 22
config['d2'] = 17
config['beta_kl'] = 0.26244
metrics = cornac_util.test(dataset_name,config,generate_BIVAECF,find_emb)