In [1]:
import sys
sys.path.insert(0, '../../src')
from models.NOMINATE import NOMINATE
from utils import preprocession as prep
from utils.metrics import compute_metric, add_to_metrics, columns

import os
import pandas as pd

from tqdm.notebook import tqdm

tqdm.pandas(leave=False, desc='Test Fit')

## Load Smartvote Data

In [2]:
def evaluateNOMINATE(folder_name):

    train, test = prep.load_data(folder_name, 'Original')
    train_users, train_reactions = train
    test_users, test_reactions = test

    train, test = prep.load_data(folder_name, 'Binary')
    train_users_bin, train_reactions_bin = train
    test_users_bin, test_reactions_bin = test

    method_path = f"../../embeddings/NOMINATE/{folder_name}"
    output_path = f"../../embeddings/NOMINATE/{folder_name}/IRT"

    os.makedirs(output_path, exist_ok=True)

    rollcalls   = prep.load_all('rollcalls', directory=method_path)
    legislators = prep.load_all('legislators', directory=method_path)

    metrics = []

    for p in tqdm(legislators.keys(), desc='Sparsity', leave=False):

        nominate = NOMINATE(legislators[p], rollcalls[p], index=train_reactions[p].index)
        nominate.train_embedding.to_csv(f'{method_path}/train_embedding_{p}.csv')

        train_predictions = pd.DataFrame(nominate.predict(nominate.train_embedding.values),
                                         index=train_reactions[p].index, columns=nominate.items.index)
        train_predictions.to_csv(f'{output_path}/train_predictions_{p}.csv')

        train_result = compute_metric(train_predictions, train_reactions[0], train_reactions[p].isna(), silent=True)
        add_to_metrics(metrics, train_result, [folder_name, 'Original', 'Train', p, 'Train', p, 'NOMINATE', 'IRT'])

        train_result_bin = compute_metric(train_predictions, train_reactions_bin[0], train_reactions_bin[p].isna(), silent=True)
        add_to_metrics(metrics, train_result_bin, [folder_name, 'Binary', 'Train', p, 'Train', p, 'NOMINATE', 'IRT'])

        test_list = test_reactions.keys() if p == 0 else [p]
        for q in test_list:
            test_fit = test_reactions_bin[q].progress_apply(nominate.encode, axis=1, result_type='expand')

            test_embedding = pd.DataFrame(test_fit.iloc[:,:2].values, index=test_reactions[q].index, columns=['x','y'])
            test_embedding.to_csv(f'{method_path}/test_embedding_{p}_{q}.csv')

            test_predictions = pd.DataFrame(nominate.predict(test_embedding.values),
                                            index=test_reactions[q].index, columns=nominate.items.index)
            test_predictions.to_csv(f'{output_path}/test_predictions_{p}_{q}.csv')
            
            test_result = compute_metric(test_predictions, test_reactions[0], test_reactions[q].isna(), silent=True)
            add_to_metrics(metrics, test_result, [folder_name, 'Original', 'Train', p, 'Test', q, 'NOMINATE', 'IRT'])

            test_result_bin= compute_metric(test_predictions, test_reactions_bin[0], test_reactions_bin[q].isna(), silent=True)
            add_to_metrics(metrics, test_result_bin, [folder_name, 'Binary', 'Train', p, 'Test', q, 'NOMINATE', 'IRT'])
        

    metrics = pd.DataFrame(metrics, columns=columns)
    metrics.to_csv(f'{output_path}/metrics.csv')
    return metrics

### Synthetic Data

In [3]:
metrics = evaluateNOMINATE('Synthetic_60_50')
metrics.head()

Sparsity:   0%|          | 0/8 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Predictions contain np.NaN
Predictions contain np.NaN


Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Predictions contain np.NaN
Predictions contain np.NaN


Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Predictions contain np.NaN
Predictions contain np.NaN


Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Predictions contain np.NaN
Predictions contain np.NaN


Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/60 [00:00<?, ?it/s]

Unnamed: 0,Dataset,Datatype,Train Set,Train Sparsity,Evaluation Set,Evaluation Sparsity,Embedding Method,Prediction Method,Task,Accuracy,RMSE
0,Synthetic_60_50,Original,Train,10,Train,10,NOMINATE,IRT,Fit,0.970278,0.153791
1,Synthetic_60_50,Original,Train,10,Train,10,NOMINATE,IRT,Impute,0.9525,0.164852
2,Synthetic_60_50,Original,Train,10,Train,10,NOMINATE,IRT,Overall,0.9685,0.154897
3,Synthetic_60_50,Binary,Train,10,Train,10,NOMINATE,IRT,Fit,0.970278,0.191706
4,Synthetic_60_50,Binary,Train,10,Train,10,NOMINATE,IRT,Impute,0.9525,0.21394


### Smartvote

In [4]:
metrics = evaluateNOMINATE('Smartvote')
metrics

Sparsity:   0%|          | 0/9 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Test Fit:   0%|          | 0/290 [00:00<?, ?it/s]

Unnamed: 0,Dataset,Datatype,Train Set,Train Sparsity,Evaluation Set,Evaluation Sparsity,Embedding Method,Prediction Method,Task,Accuracy,RMSE
0,Smartvote,Original,Train,10,Train,10,NOMINATE,IRT,Fit,0.831468,0.268485
1,Smartvote,Original,Train,10,Train,10,NOMINATE,IRT,Impute,0.813810,0.278854
2,Smartvote,Original,Train,10,Train,10,NOMINATE,IRT,Overall,0.829585,0.269591
3,Smartvote,Binary,Train,10,Train,10,NOMINATE,IRT,Fit,0.831468,0.352462
4,Smartvote,Binary,Train,10,Train,10,NOMINATE,IRT,Impute,0.813810,0.365422
...,...,...,...,...,...,...,...,...,...,...,...
157,Smartvote,Original,Train,30,Test,30,NOMINATE,IRT,Impute,0.810815,0.282026
158,Smartvote,Original,Train,30,Test,30,NOMINATE,IRT,Overall,0.822299,0.273701
159,Smartvote,Binary,Train,30,Test,30,NOMINATE,IRT,Fit,0.827066,0.353138
160,Smartvote,Binary,Train,30,Test,30,NOMINATE,IRT,Impute,0.810815,0.368344


In [5]:
metrics.loc[ metrics['Train Sparsity']==80]

Unnamed: 0,Dataset,Datatype,Train Set,Train Sparsity,Evaluation Set,Evaluation Sparsity,Embedding Method,Prediction Method,Task,Accuracy,RMSE
48,Smartvote,Original,Train,80,Train,80,NOMINATE,IRT,Fit,0.857583,0.259026
49,Smartvote,Original,Train,80,Train,80,NOMINATE,IRT,Impute,0.789447,0.297707
50,Smartvote,Original,Train,80,Train,80,NOMINATE,IRT,Overall,0.803074,0.289971
51,Smartvote,Binary,Train,80,Train,80,NOMINATE,IRT,Fit,0.857583,0.341158
52,Smartvote,Binary,Train,80,Train,80,NOMINATE,IRT,Impute,0.789447,0.386086
53,Smartvote,Binary,Train,80,Train,80,NOMINATE,IRT,Overall,0.803074,0.3771
54,Smartvote,Original,Train,80,Test,80,NOMINATE,IRT,Fit,0.835862,0.262743
55,Smartvote,Original,Train,80,Test,80,NOMINATE,IRT,Impute,0.781264,0.307753
56,Smartvote,Original,Train,80,Test,80,NOMINATE,IRT,Overall,0.792184,0.298751
57,Smartvote,Binary,Train,80,Test,80,NOMINATE,IRT,Fit,0.835862,0.341882
