In [1]:
import sys
sys.path.insert(0, '../../src')
from models.IDEAL import IDEAL
from utils import preprocession as prep
from utils.metrics import compute_metric, add_to_metrics, columns

import os
import pandas as pd

from tqdm.notebook import tqdm

tqdm.pandas(leave=False, desc='Test Fit')

## Load Smartvote Data

In [4]:
def evaluateIDEAL(folder_name):

    train, test = prep.load_data(folder_name, 'Original')
    train_users, train_reactions = train
    test_users, test_reactions = test

    train, test = prep.load_data(folder_name, 'Binary')
    train_users_bin, train_reactions_bin = train
    test_users_bin, test_reactions_bin = test

    method_path = f"../../embeddings/IDEAL/{folder_name}"
    output_path = f"../../embeddings/IDEAL/{folder_name}/IRT"
    os.makedirs(output_path, exist_ok=True)

    betabar   = prep.load_all('betabar', directory=method_path)
    xbar = prep.load_all('xbar', directory=method_path)

    metrics = []

    for p in tqdm(betabar.keys(), desc='Sparsity', leave=False):

        ideal = IDEAL(betabar[p], xbar[p], index=train_reactions[p].index, columns=train_reactions[p].columns)
        ideal.train_embedding.to_csv(f'{method_path}/train_embedding_{p}.csv')

        train_predictions = pd.DataFrame(ideal.predict(ideal.train_embedding.values),
                                         index=train_reactions[p].index, columns=ideal.items.index)
        train_predictions.to_csv(f'{output_path}/train_predictions_{p}.csv')

        train_result = compute_metric(train_predictions, train_reactions[0], train_reactions[p].isna(), silent=True)
        add_to_metrics(metrics, train_result, [folder_name, 'Original', 'Train', p, 'Train', p, 'IDEAL', 'IRT'])

        train_result_bin = compute_metric(train_predictions, train_reactions_bin[0], train_reactions_bin[p].isna(), silent=True)
        add_to_metrics(metrics, train_result_bin, [folder_name, 'Binary', 'Train', p, 'Train', p, 'IDEAL', 'IRT'])

        test_list = test_reactions.keys() if p == 0 else [p]
        for q in test_list:
            test_fit = test_reactions_bin[q].progress_apply(ideal.encode, axis=1, result_type='expand')

            test_embedding = pd.DataFrame(test_fit.iloc[:,:2].values, index=test_reactions[q].index, columns=['x','y'])
            test_embedding.to_csv(f'{method_path}/test_embedding_{p}_{q}.csv')

            test_predictions = pd.DataFrame(ideal.predict(test_embedding.values),
                                            index=test_reactions[q].index, columns=ideal.items.index)
            test_predictions.to_csv(f'{output_path}/test_predictions_{p}_{q}.csv')
            
            test_result = compute_metric(test_predictions, test_reactions[0], test_reactions[q].isna(), silent=True)
            add_to_metrics(metrics, test_result, [folder_name, 'Original', 'Train', p, 'Test', q, 'IDEAL', 'IRT'])

            test_result_bin= compute_metric(test_predictions, test_reactions_bin[0], test_reactions_bin[q].isna(), silent=True)
            add_to_metrics(metrics, test_result_bin, [folder_name, 'Binary', 'Train', p, 'Test', q, 'IDEAL', 'IRT'])
        

    metrics = pd.DataFrame(metrics, columns=columns)
    metrics.to_csv(f'{output_path}/metrics.csv')
    return metrics

### Synthetic Data

In [5]:
metrics = evaluateIDEAL('Synthetic_60_50')
metrics.head()

Sparsity:   0%|          | 0/10 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Unnamed: 0,Dataset,Datatype,Train Set,Train Sparsity,Evaluation Set,Evaluation Sparsity,Embedding Method,Prediction Method,Task,Accuracy,RMSE
0,Synthetic_60_50,Original,Train,10,Train,10,IDEAL,IRT,Fit,0.996204,0.20641
1,Synthetic_60_50,Original,Train,10,Train,10,IDEAL,IRT,Impute,0.973333,0.20605
2,Synthetic_60_50,Original,Train,10,Train,10,IDEAL,IRT,Overall,0.993917,0.206374
3,Synthetic_60_50,Binary,Train,10,Train,10,IDEAL,IRT,Fit,0.996204,0.068754
4,Synthetic_60_50,Binary,Train,10,Train,10,IDEAL,IRT,Impute,0.973333,0.13429


### Smartvote

In [6]:
metrics = evaluateIDEAL('Smartvote')
metrics.head()

Sparsity:   0%|          | 0/10 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Unnamed: 0,Dataset,Datatype,Train Set,Train Sparsity,Evaluation Set,Evaluation Sparsity,Embedding Method,Prediction Method,Task,Accuracy,RMSE
0,Smartvote,Original,Train,10,Train,10,IDEAL,IRT,Fit,0.829269,0.267925
1,Smartvote,Original,Train,10,Train,10,IDEAL,IRT,Impute,0.814273,0.277842
2,Smartvote,Original,Train,10,Train,10,IDEAL,IRT,Overall,0.82767,0.268983
3,Smartvote,Binary,Train,10,Train,10,IDEAL,IRT,Fit,0.829269,0.342721
4,Smartvote,Binary,Train,10,Train,10,IDEAL,IRT,Impute,0.814273,0.355735


In [7]:
metrics.loc[ metrics['Train Sparsity']==90]

Unnamed: 0,Dataset,Datatype,Train Set,Train Sparsity,Evaluation Set,Evaluation Sparsity,Embedding Method,Prediction Method,Task,Accuracy,RMSE
48,Smartvote,Original,Train,90,Train,90,IDEAL,IRT,Fit,0.877576,0.250567
49,Smartvote,Original,Train,90,Train,90,IDEAL,IRT,Impute,0.763174,0.343993
50,Smartvote,Original,Train,90,Train,90,IDEAL,IRT,Overall,0.773851,0.335273
51,Smartvote,Binary,Train,90,Train,90,IDEAL,IRT,Fit,0.877576,0.291916
52,Smartvote,Binary,Train,90,Train,90,IDEAL,IRT,Impute,0.763174,0.413295
53,Smartvote,Binary,Train,90,Train,90,IDEAL,IRT,Overall,0.773851,0.401966
54,Smartvote,Original,Train,90,Test,90,IDEAL,IRT,Fit,0.862562,0.274722
55,Smartvote,Original,Train,90,Test,90,IDEAL,IRT,Impute,0.753448,0.345251
56,Smartvote,Original,Train,90,Test,90,IDEAL,IRT,Overall,0.763632,0.338668
57,Smartvote,Binary,Train,90,Test,90,IDEAL,IRT,Fit,0.862562,0.325736
