# IMPACT paper experiments
### 1. Init
#### 1.1. Import libraries (necessary)

In [1]:
%load_ext autoreload
%autoreload 2

import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
os.environ["PYTHONHASHSEED"] = "0"
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ":4096:8"

from liriscat import utils
utils.set_seed(0)
from liriscat import dataset
from liriscat import selectionStrategy
from liriscat import CDM

import logging
import gc
import json
import torch
utils.set_seed(0)
import pandas as pd
from importlib import reload
import IMPACT

#### 1.2. Set up the loggers (recommended)

In [2]:
utils.setuplogger(verbose = True, log_name="liriscat")

### 2. CDM prediction
#### 2.1. Training and testing, sequential version

In [3]:
import warnings
import numpy as np

gc.collect()
torch.cuda.empty_cache()

reload(utils)
reload(selectionStrategy)
reload(CDM)
reload(dataset)

<module 'liriscat.dataset' from '/lustre/fswork/projects/rech/enh/unv34ei/liriscat/liriscat/dataset/__init__.py'>

In [4]:
config = utils.generate_eval_config(load_params=True, esc = 'error', valid_metric= 'mi_acc', pred_metrics = ["mi_acc"], profile_metrics = ['doa'], save_params=False, n_query=16, num_epochs=4, batch_size=512)
utils.set_seed(config["seed"])

config["dataset_name"] = "math2"
logging.info(config["dataset_name"])
config['learning_rate'] = 0.02026
config['lambda'] = 1.2e-5
config['d_in'] = 4
config['num_responses'] = 12
#pred_metrics,df_interp = test(config)

CUDA is available. Using GPU.
[INFO 53:43] math2


In [5]:
logging.info(f'#### {config["dataset_name"]} ####')
logging.info(f'#### config : {config} ####')
config['embs_path']='../embs/'+str(config["dataset_name"])
config['params_path']='../ckpt/'+str(config["dataset_name"])

pred_metrics = {m:[] for m in config['pred_metrics']}
profile_metrics = {m:[] for m in config['profile_metrics']}

gc.collect()
torch.cuda.empty_cache()

# Dataset downloading for doa and rm
warnings.filterwarnings("ignore", message="invalid value encountered in divide")
warnings.filterwarnings("ignore", category=RuntimeWarning)

## Concept map format : {question_id : [category_id1, category_id2, ...]}
concept_map = json.load(open(f'../datasets/2-preprocessed_data/{config["dataset_name"]}_concept_map.json', 'r'))
concept_map = {int(k): [int(x) for x in v] for k, v in concept_map.items()}

## Metadata map format : {"num_user_id": ..., "num_item_id": ..., "num_dimension_id": ...}
metadata = json.load(open(f'../datasets/2-preprocessed_data/{config["dataset_name"]}_metadata.json', 'r'))


## Tensor containing the nb of modalities per question
nb_modalities = torch.load(f'../datasets/2-preprocessed_data/{config["dataset_name"]}_nb_modalities.pkl',weights_only=True)


[INFO 53:43] #### math2 ####
[INFO 53:43] #### config : {'seed': 0, 'dataset_name': 'math2', 'load_params': True, 'save_params': False, 'embs_path': '../embs/', 'params_path': '../ckpt/', 'early_stopping': True, 'esc': 'error', 'verbose_early_stopping': False, 'disable_tqdm': False, 'valid_metric': 'mi_acc', 'learning_rate': 0.02026, 'batch_size': 512, 'valid_batch_size': 10000, 'num_epochs': 4, 'eval_freq': 1, 'patience': 30, 'device': device(type='cuda'), 'lambda': 1.2e-05, 'tensorboard': False, 'flush_freq': True, 'pred_metrics': ['mi_acc'], 'profile_metrics': ['doa'], 'num_responses': 12, 'low_mem': False, 'n_query': 16, 'CDM': 'impact', 'i_fold': 0, 'num_inner_users_epochs': 10, 'num_inner_epochs': 10, 'inner_lr': 0.0001, 'inner_user_lr': 0.0001, 'd_in': 4} ####


In [6]:
reload(utils)
reload(selectionStrategy)
reload(dataset)
reload(CDM)
config['inner_user_lr'] = 0.01
config['num_inner_users_epochs'] = 8
for i_fold in range(5) : 
    ## Dataframe columns : (user_id, question_id, response, category_id)
    train_df = pd.read_csv(
        f'../datasets/2-preprocessed_data/{config["dataset_name"]}_train_{i_fold}.csv',
        encoding='utf-8', dtype={'student_id': int, 'item_id': int, "correct": float,
                                                                 "dimension_id": int})
    valid_df = pd.read_csv(
        f'../datasets/2-preprocessed_data/{config["dataset_name"]}_valid_{i_fold}.csv',
        encoding='utf-8', dtype={'student_id': int, 'item_id': int, "correct": float,
                                                                 "dimension_id": int})
    test_df = pd.read_csv(
        f'../datasets/2-preprocessed_data/{config["dataset_name"]}_test_{i_fold}.csv',
        encoding='utf-8', dtype={'student_id': int, 'item_id': int, "correct": float,
                                                                 "dimension_id": int})

    train_data = dataset.CATDataset(train_df, concept_map, metadata, config,nb_modalities)
    valid_data = dataset.EvalDataset(valid_df, concept_map, metadata, config,nb_modalities)
    test_data = dataset.EvalDataset(test_df, concept_map, metadata, config,nb_modalities)

    S = selectionStrategy.Random(metadata,**config)
    S.init_models(train_data, valid_data)
    S.evaluate_test(test_data)

In [7]:
reload(dataset)


In [9]:


# Important for reproducibility
train_data.reset_rng()
valid_data.reset_rng()
test_data.reset_rng()



[INFO 53:51] Random_cont_model
compiling CDM model
compiling selection model


100%|██████████| 16/16 [01:18<00:00,  4.93s/it]


({0: {'mi_acc': 0.6940707564353943},
  1: {'mi_acc': 0.7073243260383606},
  2: {'mi_acc': 0.7182859778404236},
  3: {'mi_acc': 0.7188838720321655},
  4: {'mi_acc': 0.7166915535926819},
  5: {'mi_acc': 0.7226706147193909},
  6: {'mi_acc': 0.7244643568992615},
  7: {'mi_acc': 0.7248629331588745},
  8: {'mi_acc': 0.724962592124939},
  9: {'mi_acc': 0.7225709557533264},
  10: {'mi_acc': 0.727453887462616},
  11: {'mi_acc': 0.7257598042488098},
  12: {'mi_acc': 0.7266566753387451},
  13: {'mi_acc': 0.7259591221809387},
  14: {'mi_acc': 0.724364697933197},
  15: {'mi_acc': 0.7279521226882935}},
 {0: {'doa': np.float64(0.4178885070032461)},
  1: {'doa': np.float64(0.419477220898568)},
  2: {'doa': np.float64(0.4561491014760655)},
  3: {'doa': np.float64(0.45494280946819776)},
  4: {'doa': np.float64(0.4354390672272548)},
  5: {'doa': np.float64(0.4802023484360847)},
  6: {'doa': np.float64(0.47577463974850365)},
  7: {'doa': np.float64(0.4392771510712827)},
  8: {'doa': np.float64(0.476557244

In [None]:

S.train(train_data, valid_data)