# War of Words - Model Evaluation

Use this notebook to train and evaluate the War of Words models:
- With MEPs only (`WoW`)
- With rapporteur advantage (`WoW(R)`)

In [1]:
import json
import numpy as np

from warofwords import WarOfWords, TrainedWarOfWords

# Define all experiments.
experiments = [{
    'leg': 7,           # Legislature.
    'reg': 0.32,        # Hyperparameters.
    'mtype': 'meponly'  # Model types.
}, {
    'leg': 7,
    'reg': 0.39,
    'mtype': 'rapadv'
},{
    'leg': 8,
    'reg': 0.35,
    'mtype': 'meponly'
},{
    'leg': 8,
    'reg': 0.39,
    'mtype': 'rapadv'
}]


def evaluate(modeltype, leg, regularizer):
    """Evaluate a model on a given legislature."""
    print(f'Evaluating "{modeltype}" on EP{leg}')
    # Load data.
    path = f'../data/processed/{modeltype}-ep{leg}-train.pkl'
    features, featmats, labels = WarOfWords.load_data(path)
    train = list(zip(featmats, labels))
    
    # Initialize model.
    hyperparams = WarOfWords.Hyperparameters(regularizer=regularizer)
    model = WarOfWords(train, features, hyperparams, bias_key='bias')
    
    # Train model.
    print('  Training...')
    params, cost = model.fit()
    llh = model.log_likelihood(params['params'].as_array())
    print(f'  Log-likelihood: {llh:.2f}')
          
    # Initialize trained model.
    trained = TrainedWarOfWords(features, hyperparams, **params)
    
    # Load test set.
    path = f'../data/processed/{modeltype}-ep{leg}-test.pkl'
    features, featmats, labels = TrainedWarOfWords.load_data(path)
    test = list(zip(featmats, labels))
    
    # Evaluate log loss.
    print(f'  Log-loss on test set: {trained.log_loss(test):.3f}')

## Evaluate

In [2]:
%%time

for exp in experiments:
    evaluate(exp['mtype'], exp['leg'], exp['reg'])

Evaluating "meponly" on EP7
  Training...
  Log-likelihood: -61034.69
  Log-loss on test set: 0.714
Evaluating "rapadv" on EP7
  Training...
  Log-likelihood: -58885.19
  Log-loss on test set: 0.690
Evaluating "meponly" on EP8
  Training...
  Log-likelihood: -71970.44
  Log-loss on test set: 0.748
Evaluating "rapadv" on EP8
  Training...
  Log-likelihood: -69568.96
  Log-loss on test set: 0.726
CPU times: user 13min 14s, sys: 12.7 s, total: 13min 27s
Wall time: 14min 5s
