# DeepMTP experiments
### 1. Init
#### 1.1. Import libraries

In [1]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

import sys
sys.path.append("../../")


import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

from IMPACT import utils
utils.set_seed(0)
from IMPACT import dataset
from IMPACT import model
from experiments.datasets.data_utils import DeepMTP_util

import optuna
import gc
import json
import torch

from importlib import reload

import logging


ModuleNotFoundError: No module named 'DeepMTP'

#### 1.2. Set up the loggers

In [14]:
utils.setuplogger(verbose = True, log_name="DeepMTP")

#### 1.4. Parametrize the datasets

In [3]:
# choose dataset here
dataset_name = 'postcovid'
version= ""#"_small"
# modify config here

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("CUDA is available. Using GPU.")
else:
    device = torch.device("cpu")
    print("CUDA is not available. Using CPU.")

config = {

    # General params
    'seed' : 0,

    # Saving params
    'load_params': False,
    'save_params': False,
    'embs_path' : '../embs/'+str(dataset_name),
    'params_path' :'../ckpt/'+str(dataset_name),

    # training mode
    'early_stopping' : True,
    'fast_training' : True, # (Only taken in account if early_stopping == true) If true, doesn't compute valid rmse PC-ER

    # Learning params
    'learning_rate': 0.001,
    'batch_size': 2048,
    'num_epochs': 200,
    'num_dim': 10, # for IRT or MIRT todo : is it necessary as we use concepts knowledge number as embedding dimension ?
    'eval_freq' : 1,
    'patience' : 30,
    'device': device,
    'lambda' : 7.7e-6,
    'tensorboard': False,
    'flush_freq' : True,
    
    # for NeuralCD
    'prednet_len1': 128,
    'prednet_len2': 64,
    'best_params_path':'',
    
    #For GCCD
    'num_layers': 0,
    'version': 'pair',
    'p_dropout': 0,
    'low_mem_mode' : True,
    'user_nbrs_n' : 10,
    'item_nbrs_n' : 5
}
concept_map = json.load(open(f'../datasets/{dataset_name}/concept_map.json', 'r'))
concept_map = {int(k):[int(x) for x in v] for k,v in concept_map.items()}
metadata = json.load(open(f'../datasets/{dataset_name}/metadata.json', 'r'))
utils.set_seed(config['seed'])
dataset_name += version
logging.info(f'#### {dataset_name} ####')
logging.info(f'#### config : {config} ####')

[INFO 13:40] #### postcovid ####
[INFO 13:40] #### config : {'seed': 0, 'load_params': False, 'save_params': False, 'embs_path': '../embs/postcovid', 'params_path': '../ckpt/postcovid', 'early_stopping': True, 'fast_training': True, 'learning_rate': 0.001, 'batch_size': 2048, 'num_epochs': 200, 'num_dim': 10, 'eval_freq': 1, 'patience': 30, 'device': 'cuda:0', 'lambda': 7.7e-06, 'tensorboard': False, 'flush_freq': True, 'prednet_len1': 128, 'prednet_len2': 64, 'best_params_path': '', 'num_layers': 0, 'version': 'pair', 'p_dropout': 0, 'low_mem_mode': True, 'user_nbrs_n': 10, 'item_nbrs_n': 5} ####


### 2. CDM Training


In [4]:
reload(utils)
reload(model)
reload(dataset)

seed = 0
utils.set_seed(0)

config['seed'] = seed
config['early_stopping'] = True
config['esc'] = 'objectives' #'loss' 'delta_objectives'
config['num_epochs']=10
config['eval_freq']=1
config['patience']=30

config['verbose_early_stopping'] = False
config["tensorboard"] = False
config['flush_freq'] = False
config['save_params']= False
config['disable_tqdm'] = True

In [None]:
reload(DeepMTP_util)

dataset_name = "postcovid"
data,metadata = DeepMTP_util.load_dataset(dataset_name)
train,val,test,data_info = data

study = optuna.create_study(
    directions=["minimize"],  # Specify directions for each objective
)
gc.collect()
torch.cuda.empty_cache()
study.optimize(lambda trial: DeepMTP_util.objective_MTP(trial, config, metadata, DeepMTP_util.generate_DeepMTP,train,val,test), n_trials=100, timeout=1800, n_jobs=1, gc_after_trial=True)

# Analyze the results
## requirements : plotly, nbformat
pareto_trials = study.best_trials
logging.info(f"Best trial for {dataset_name} : {study.best_trials}")

logging.info("Number of trials :"+str(len(study.trials)))
for trial in study.trials:
    logging.info(f"Trial #{trial.number}")
    logging.info(f"  RMSE: {trial.values}")
    #logging.info(f"  DOA: {trial.values[1]}")
    logging.info(f"  Params: {trial.params}")

dataset_name = "promis"
data,metadata = DeepMTP_util.load_dataset(dataset_name)
train,val,test,data_info = data

study = optuna.create_study(
    directions=["minimize"],  # Specify directions for each objective
)
gc.collect()
torch.cuda.empty_cache()
study.optimize(lambda trial: DeepMTP_util.objective_MTP(trial, config, metadata, DeepMTP_util.generate_DeepMTP,train,val,test), n_trials=100, timeout=14400, n_jobs=4, gc_after_trial=True)

# Analyze the results
## requirements : plotly, nbformat
pareto_trials = study.best_trials
logging.info(f"Best trial for {dataset_name} : {study.best_trials}")

logging.info("Number of trials :"+str(len(study.trials)))
for trial in study.trials:
    logging.info(f"Trial #{trial.number}")
    logging.info(f"  RMSE: {trial.values}")
    #logging.info(f"  DOA: {trial.values[1]}")
    logging.info(f"  Params: {trial.params}")

dataset_name = "movielens"
data,metadata = DeepMTP_util.load_dataset(dataset_name)
train,val,test,data_info = data

study = optuna.create_study(
    directions=["minimize"],  # Specify directions for each objective
)
gc.collect()
torch.cuda.empty_cache()
study.optimize(lambda trial: DeepMTP_util.objective_MTP(trial, config, metadata, DeepMTP_util.generate_DeepMTP,train,val,test), n_trials=100, timeout=14400, n_jobs=4, gc_after_trial=True)

# Analyze the results
## requirements : plotly, nbformat
pareto_trials = study.best_trials
logging.info(f"Best trial for {dataset_name} : {study.best_trials}")

logging.info("Number of trials :"+str(len(study.trials)))
for trial in study.trials:
    logging.info(f"Trial #{trial.number}")
    logging.info(f"  RMSE: {trial.values}")
    #logging.info(f"  DOA: {trial.values[1]}")
    logging.info(f"  Params: {trial.params}")

dataset_name = "portrait"
data,metadata = DeepMTP_util.load_dataset(dataset_name)
train,val,test,data_info = data

study = optuna.create_study(
    directions=["minimize"],  # Specify directions for each objective
)
gc.collect()
torch.cuda.empty_cache()
study.optimize(lambda trial: DeepMTP_util.objective_MTP(trial, config, metadata, DeepMTP_util.generate_DeepMTP,train,val,test), n_trials=100, timeout=14400, n_jobs=4, gc_after_trial=True)

# Analyze the results
## requirements : plotly, nbformat
pareto_trials = study.best_trials
logging.info(f"Best trial for {dataset_name} : {study.best_trials}")

logging.info("Number of trials :"+str(len(study.trials)))
for trial in study.trials:
    logging.info(f"Trial #{trial.number}")
    logging.info(f"  RMSE: {trial.values}")
    #logging.info(f"  DOA: {trial.values[1]}")
    logging.info(f"  Params: {trial.params}")

dataset_name = "postcovid"
data,metadata = DeepMTP_util.load_dataset(dataset_name)
train,val,test,data_info = data

study = optuna.create_study(
    directions=["minimize"],  # Specify directions for each objective
)
gc.collect()
torch.cuda.empty_cache()
study.optimize(lambda trial: DeepMTP_util.objective_MTP(trial, config, metadata, DeepMTP_util.generate_DeepMTP,train,val,test), n_trials=1, timeout=1800, n_jobs=4, gc_after_trial=True)

# Analyze the results
## requirements : plotly, nbformat
pareto_trials = study.best_trials
logging.info(f"Best trial for {dataset_name} : {study.best_trials}")

logging.info("Number of trials :"+str(len(study.trials)))
for trial in study.trials:
    logging.info(f"Trial #{trial.number}")
    logging.info(f"  RMSE: {trial.values}")
    #logging.info(f"  DOA: {trial.values[1]}")
    logging.info(f"  Params: {trial.params}")

### 3. CDM Prediction
#### 3.1. Training and testing

In [18]:
reload(DeepMTP_util)


dataset_name = "postcovid"
logging.info(dataset_name)
config['learning_rate'] = 0.00833
config['lambda'] = 1e-7
metrics = DeepMTP_util.test(dataset_name,config)

#
# dataset_name = "promis"
# logging.info(dataset_name)
# config['learning_rate'] = 0.00027
# config['lambda'] = 3e-7
# metrics = DeepMTP_util.test(dataset_name,config)
#
# dataset_name = "movielens"
# logging.info(dataset_name)
# config['learning_rate'] = 0.00153
# config['lambda'] = 5e-7
# metrics = DeepMTP_util.test(dataset_name,config)
#
# dataset_name = "portrait"
# logging.info(dataset_name)
# config['learning_rate'] = 0.00735
# config['lambda'] = 1e-7
# metrics = DeepMTP_util.test(dataset_name,config)

[INFO 59:51] postcovid
Interaction file: triplet format detected
Interaction file: triplet format detected
Interaction file: triplet format detected
Interaction file: checking format consistency... Passed
Interaction file: checking instance id format consistency... Passed
Interaction file: checking target id type consistency... Passed

Interaction file: checking target variable type consistency... Passed
Automatically detected type of target variable type: real-valued

Interaction file: Checking for novel instances... Done
-- no Novel instances detected in the test set
Interaction file: Checking for novel targets... Done
-- no Novel targets detected in the test set
Estimating validation setting... Done-- Detected as setting :A

Instance features file: processing features... Done
Instance features file: processing features... Done
Instance features file: processing features... Done
Instance features file: processing features... Done
Instance features file: processing features... Done
In

In [16]:
metrics

{'mae': [0.2224221],
 'rmse': [0.27964154],
 'pc-er': [0.03776286723207629],
 'doa': [0.500698497767473],
 'rm': [0.021603574001723948]}