# CDM pretraining
### Import

## Train IMPACT model

In [1]:
from IMPACT.utils import generate_eval_config
from IMPACT.dataset import LoaderDataset as IMPACT_dataset
from IMPACT import model
from liriscat.dataset import preprocessing_utilities as pu

In [None]:
folds_nb = 1
dataset_name="math2"

# Set all the required parameters ---------------
IMPACT_config = generate_eval_config(save_params=True, dataset_name=dataset_name, embs_path="../embs/"+dataset_name, params_path="../ckpt/"+dataset_name,  learning_rate=0.016848380924625605, lambda_=9.972254466547545e-06, batch_size=2048,num_epochs=200,valid_metric='mi_acc', pred_metrics=["mi_acc"],profile_metrics=['doa'])

concept_map, metadata, nb_modalities = pu.load_dataset_resources(IMPACT_config)

CUDA is available. Using GPU.


In [None]:
for seed in range(3):
    IMPACT_config['seed'] = seed
    for i_fold in range(folds_nb):

        IMPACT_config['i_fold'] = i_fold
        vertical_train, vertical_valid = pu.vertical_data(IMPACT_config, i_fold)

        impact_train_data = IMPACT_dataset(vertical_train, concept_map, metadata, nb_modalities)
        impact_valid_data = IMPACT_dataset(vertical_valid, concept_map, metadata, nb_modalities)

        algo = model.IMPACT(**IMPACT_config)
        algo.init_model(impact_train_data, impact_valid_data)
        algo.train(impact_train_data, impact_valid_data)
        print(algo.evaluate_predictions(impact_valid_data))
        print(algo.evaluate_predictions(impact_train_data))

 88%|████████▊ | 177/200 [05:37<00:43,  1.91s/it]


{'mi_acc': 0.7119959592819214, 'preds': tensor([1., 1., 2.,  ..., 1., 1., 1.], device='cuda:0', dtype=torch.float64), 'labels': tensor([1., 1., 2.,  ..., 1., 1., 1.], device='cuda:0', dtype=torch.float64), 'nb_modalities': tensor([2, 2, 2,  ..., 2, 2, 2], device='cuda:0')}
{'mi_acc': 0.9909756779670715, 'preds': tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', dtype=torch.float64), 'labels': tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', dtype=torch.float64), 'nb_modalities': tensor([2, 2, 2,  ..., 2, 2, 2], device='cuda:0')}


 72%|███████▏  | 144/200 [04:36<01:47,  1.92s/it]


{'mi_acc': 0.7148008942604065, 'preds': tensor([1., 1., 2.,  ..., 1., 1., 1.], device='cuda:0', dtype=torch.float64), 'labels': tensor([1., 1., 2.,  ..., 1., 1., 1.], device='cuda:0', dtype=torch.float64), 'nb_modalities': tensor([2, 2, 2,  ..., 2, 2, 2], device='cuda:0')}
{'mi_acc': 0.9905430674552917, 'preds': tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', dtype=torch.float64), 'labels': tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', dtype=torch.float64), 'nb_modalities': tensor([2, 2, 2,  ..., 2, 2, 2], device='cuda:0')}


 40%|███▉      | 79/200 [02:25<03:42,  1.84s/it]


{'mi_acc': 0.7108439803123474, 'preds': tensor([1., 1., 2.,  ..., 1., 1., 1.], device='cuda:0', dtype=torch.float64), 'labels': tensor([1., 1., 2.,  ..., 1., 1., 1.], device='cuda:0', dtype=torch.float64), 'nb_modalities': tensor([2, 2, 2,  ..., 2, 2, 2], device='cuda:0')}
{'mi_acc': 0.9911642670631409, 'preds': tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', dtype=torch.float64), 'labels': tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', dtype=torch.float64), 'nb_modalities': tensor([2, 2, 2,  ..., 2, 2, 2], device='cuda:0')}


: 

In [10]:
print(algo.evaluate_profiles(impact_valid_data))

{'doa': 0.536687900170763}


## Train NCDM Model

In [1]:
%load_ext autoreload
%autoreload 2

import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
os.environ["PYTHONHASHSEED"] = "0"
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ":4096:8"
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

import liriscat
liriscat.utils.set_seed(0)

import logging
import gc
import json
import torch
liriscat.utils.set_seed(0)
import pandas as pd

In [2]:
from IMPACT.utils import generate_eval_config
from IMPACT.dataset import LoaderDataset as IMPACT_dataset
from liriscat.CDM.NCDM import NCDM
from liriscat.dataset import preprocessing_utilities as pu
from liriscat.utils import convert_config_to_EduCAT

In [5]:
folds_nb = 1
dataset_name="math2"

# Set all the required parameters ---------------
IMPACT_config = generate_eval_config(save_params=True, patience=5,num_epochs=20, dataset_name=dataset_name, embs_path="../embs/"+dataset_name, params_path="../ckpt/"+dataset_name,  learning_rate=0.016848380924625605, lambda_=9.972254466547545e-06, batch_size=2048,valid_metric='mi_acc', pred_metrics=["mi_acc"],profile_metrics=['doa'])

concept_map, metadata, nb_modalities = pu.load_dataset_resources(IMPACT_config)

NCDM_config = convert_config_to_EduCAT(IMPACT_config, metadata)

CUDA is available. Using GPU.


In [18]:
for seed in range(1,2):
    IMPACT_config['seed'] = seed
    for i_fold in range(folds_nb):

        IMPACT_config['i_fold'] = i_fold
        vertical_train, vertical_valid = pu.vertical_data(IMPACT_config, i_fold)

        impact_train_data = IMPACT_dataset(vertical_train, concept_map, metadata, nb_modalities)
        impact_valid_data = IMPACT_dataset(vertical_valid, concept_map, metadata, nb_modalities)

        train_set, valid_set = [
            pu.transform(data.raw_data_array[:,0].long(), data.raw_data_array[:,1].long(), concept_map, data.raw_data_array[:,2], IMPACT_config['batch_size'], impact_train_data.n_categories)
            for data in [impact_train_data, impact_valid_data]
        ]
        cdm = NCDM(metadata['num_dimension_id'], metadata['num_item_id'], metadata['num_user_id'], IMPACT_config)
        cdm.train(train_set, valid_set, epoch=IMPACT_config['num_epochs'], device="cuda")

        print(cdm.eval(valid_set))

  torch.tensor(user, dtype=torch.int64),  # (1, user_n) to (0, user_n-1)
  torch.tensor(item, dtype=torch.int64),  # (1, item_n) to (0, item_n-1)
  torch.tensor(score, dtype=torch.float32)-1
Epoch 0: 100%|██████████| 89/89 [00:05<00:00, 15.26it/s]


[Epoch 0] average loss: 5.796953


Evaluating: 100%|██████████| 10/10 [00:00<00:00, 15.01it/s]


[Epoch 0] rmse: 0.527316, accuracy: 0.452542


Epoch 1: 100%|██████████| 89/89 [00:05<00:00, 15.59it/s]


[Epoch 1] average loss: 0.710027


Evaluating: 100%|██████████| 10/10 [00:00<00:00, 22.47it/s]


[Epoch 1] rmse: 0.500834, accuracy: 0.452542


Epoch 2: 100%|██████████| 89/89 [00:05<00:00, 15.39it/s]


[Epoch 2] average loss: 0.691606


Evaluating: 100%|██████████| 10/10 [00:00<00:00, 15.34it/s]


[Epoch 2] rmse: 0.498378, accuracy: 0.547458


Epoch 3: 100%|██████████| 89/89 [00:05<00:00, 15.68it/s]


[Epoch 3] average loss: 0.688657


Evaluating: 100%|██████████| 10/10 [00:00<00:00, 22.58it/s]


[Epoch 3] rmse: 0.497852, accuracy: 0.547458


Epoch 4: 100%|██████████| 89/89 [00:05<00:00, 15.63it/s]


[Epoch 4] average loss: 0.688128


Evaluating: 100%|██████████| 10/10 [00:00<00:00, 15.23it/s]


[Epoch 4] rmse: 0.497751, accuracy: 0.547458


Epoch 5: 100%|██████████| 89/89 [00:05<00:00, 15.63it/s]


[Epoch 5] average loss: 0.687936


Evaluating: 100%|██████████| 10/10 [00:00<00:00, 22.62it/s]


[Epoch 5] rmse: 0.497743, accuracy: 0.547458


Epoch 6: 100%|██████████| 89/89 [00:05<00:00, 15.71it/s]


[Epoch 6] average loss: 0.687884


Evaluating: 100%|██████████| 10/10 [00:00<00:00, 15.29it/s]


[Epoch 6] rmse: 0.497743, accuracy: 0.547458


Epoch 7: 100%|██████████| 89/89 [00:05<00:00, 15.72it/s]


[Epoch 7] average loss: 0.687869


Evaluating: 100%|██████████| 10/10 [00:00<00:00, 15.29it/s]


[Epoch 7] rmse: 0.497746, accuracy: 0.547458


Epoch 8: 100%|██████████| 89/89 [00:05<00:00, 15.61it/s]


[Epoch 8] average loss: 0.687988


Evaluating: 100%|██████████| 10/10 [00:00<00:00, 22.57it/s]


[Epoch 8] rmse: 0.497744, accuracy: 0.547458


Epoch 9: 100%|██████████| 89/89 [00:05<00:00, 15.69it/s]


[Epoch 9] average loss: 0.687974


Evaluating: 100%|██████████| 10/10 [00:00<00:00, 15.28it/s]


[Epoch 9] rmse: 0.497757, accuracy: 0.547458


Epoch 10: 100%|██████████| 89/89 [00:05<00:00, 15.67it/s]


[Epoch 10] average loss: 0.687882


Evaluating: 100%|██████████| 10/10 [00:00<00:00, 22.52it/s]


[Epoch 10] rmse: 0.497744, accuracy: 0.547458


Epoch 11: 100%|██████████| 89/89 [00:05<00:00, 15.69it/s]


[Epoch 11] average loss: 0.687675


Evaluating: 100%|██████████| 10/10 [00:00<00:00, 15.21it/s]


[Epoch 11] rmse: 0.497776, accuracy: 0.547458


Evaluating: 100%|██████████| 10/10 [00:00<00:00, 19.14it/s]

(0.4977761420973717, 0.547458051590283)





In [19]:
cdm.best_epoch

5

In [8]:
for seed in range(1,2):
    IMPACT_config['seed'] = seed
    for i_fold in range(folds_nb):

        IMPACT_config['i_fold'] = i_fold
        vertical_train, vertical_valid = pu.vertical_data(IMPACT_config, i_fold)

        impact_train_data = IMPACT_dataset(vertical_train, concept_map, metadata, nb_modalities)
        impact_valid_data = IMPACT_dataset(vertical_valid, concept_map, metadata, nb_modalities)

        train_set, valid_set = [
            pu.transform(data.raw_data_array[:,0].long(), data.raw_data_array[:,1].long(), concept_map, data.raw_data_array[:,2], IMPACT_config['batch_size'], impact_train_data.n_categories)
            for data in [impact_train_data, impact_valid_data]
        ]

  torch.tensor(user, dtype=torch.int64),  # (1, user_n) to (0, user_n-1)
  torch.tensor(item, dtype=torch.int64),  # (1, item_n) to (0, item_n-1)
  torch.tensor(score, dtype=torch.float32)-1


In [6]:
IMPACT_config['load_params'] = True
cdm = NCDM(metadata['num_dimension_id'], metadata['num_item_id'], metadata['num_user_id'], IMPACT_config)
print(cdm.eval(valid_set))
IMPACT_config['load_params'] = False

Evaluating: 100%|██████████| 10/10 [00:00<00:00, 14.68it/s]

(0.5511952602742916, 0.547458051590283)



