# War of Words - Model Evaluation

Use this notebook to train and evaluate the War of Words models:
- With MEPs only (`WoW`)
- With rapporteur advantage (`WoW(R)`)

In [1]:
import json
import numpy as np
import os

from warofwords import WarOfWords, TrainedWarOfWords

# Define all experiments.
# (Sorry, I forgot to report these values in the paper...)
experiments = [{
    'data': '../data/processed/meponly-ep7-test.pkl',
    'model': '../models/meponly-ep7.predict'
}, {
    'data': '../data/processed/rapadv-ep7-test.pkl',
    'model': '../models/rapadv-ep7.predict'
},{
    'data': '../data/processed/meponly-ep8-test.pkl',
    'model': '../models/meponly-ep8.predict'
},{
    'data': '../data/processed/rapadv-ep8-test.pkl',
    'model': '../models/rapadv-ep8.predict'
}]

In [2]:
def evaluate(model, data):
    """Evaluate a model on a given test set."""
    
    # Load data.
    data = os.path.abspath(data)
    print(f'  Test set: {data}')
    features, featmats, labels = TrainedWarOfWords.load_data(data)
    test = list(zip(featmats, labels))
    
    # Load trained model.
    model = os.path.abspath(model)
    print(f'  Model: {model}')
    trained = TrainedWarOfWords.load(model)
    
    # Evaluate log loss.
    print(f'  Log-loss: {trained.log_loss(test):.3f}')

## Evaluate

In [3]:
%%time

for i, exp in enumerate(experiments):
    print(f'Experiment {i+1}')
    evaluate(exp['model'], exp['data'])

Experiment 1
  Test set: data/processed/meponly-ep7-test.pkl
  Model: models/meponly-ep7.predict
  Log-loss: 0.714
Experiment 2
  Test set: data/processed/rapadv-ep7-test.pkl
  Model: models/rapadv-ep7.predict
  Log-loss: 0.690
Experiment 3
  Test set: data/processed/meponly-ep8-test.pkl
  Model: models/meponly-ep8.predict
  Log-loss: 0.748
Experiment 4
  Test set: data/processed/rapadv-ep8-test.pkl
  Model: models/rapadv-ep8.predict
  Log-loss: 0.726
CPU times: user 6.78 s, sys: 2.33 s, total: 9.11 s
Wall time: 9.97 s
