# War of Words: Parameter Analysis

Display the table of top-3 and and bottom-3 dossiers according to the intertia parameter. Obtain the value of the rapporteur advantage feature.

In [1]:
import json
import numpy as np

from warofwords import TrainedWarOfWords

LEG = 8  # Legislature.

## Load data and model

In [2]:
# Conflicts.
with open(f'../data/raw/war-of-words-ep{LEG}.txt') as f:
    conflicts = [json.loads(l) for l in f.readlines()]
    
# Extract edits.
edits = [edit for datum in conflicts for edit in datum]

# Features and model data.
path = f'../data/processed/rapadv-ep{LEG}.pkl'
features, featmats, _ = TrainedWarOfWords.load_data(path)

# Model.
path = f'../models/rapadv-ep{LEG}.fit'
model = TrainedWarOfWords.load(path)

# Parameters.
params = model._params

## Display top-3 and bottom-3 dossiers

In [3]:
def get_dossiers(features, params, which, n=5):
    """Get top (which='top') or bottom (which='bottom') dossiers."""
    doss = features.get_group('dossier')
    vals = params[doss]
    rev = which == 'top'
    ranking = sorted(zip(doss, vals), key=lambda k: k[1], reverse=rev)[:n]
    return [(features.get_name(d), v) for d, v in ranking]


def get_num_nodes(dossier, edits):
    cnt = 0
    for datum in edits:
        if datum['dossier_ref'] == dossier:
            cnt += 1
    return cnt


def get_num_cliques(dossier, conflicts):
    cnt = 0
    for data in conflicts:
        if data[0]['dossier_ref'] == dossier:
            if len(data) > 1:
                cnt += 1
    return cnt


def get_average_clique_size(dossier, conflicts):
    sizes = list()
    for data in conflicts:
        if data[0]['dossier_ref'] == dossier:
            if len(data) > 1:
                sizes.append(len(data))
    if len(sizes) == 0:
        return 0
    return np.mean(sizes)

def get_proportion_accepted(dossier, edits):
    acc = list()
    for datum in edits:
        if datum['dossier_ref'] == dossier:
            acc.append(datum['accepted'])
    return np.mean(acc)


def display_table(ranking, edits, conflicts):
    for doss, val in ranking:
        string = list()
        # Value.
        string.append(f'{val:.3f}')
        # Type.
        opinion = 'AD' in doss
        typ = 'opinion' if opinion else 'report'
        string.append(f'{typ}')
        # Dossier ref.
        d = doss[8:].replace(')', '-') if typ == 'opinion' else doss[4:]
        string.append(d)
        # Number of nodes.
        n = get_num_nodes(doss, edits)
        string.append(f'{n}')
        # Number of cliques.
        c = get_num_cliques(doss, conflicts)
        string.append(f'{c}')
        # Average clique size.
        a = get_average_clique_size(doss, conflicts)
        string.append(f'{a:.1f}')
        # Proportion['accepted'].
        p = get_proportion_accepted(doss, edits)
        string.append(f'{p*100:.1f}')
        vals = [f'{string[0]:>6}'] + [f'{s:>13}' for s in string[1:]]
        print(''.join(vals))

**Note:** This reproduces the results of Table 3 in the paper. I know, the values don't match exactly. I must have made a mistake when reporting them in my write-up. But the dossiers and the order match, the difference is at most 0.004 (for the bottom dossiers).


You can browse the Parliament documents to find details about the dossiers for [EP7](https://www.europarl.europa.eu/committees/en/archives/7/document-search) and [EP8](https://www.europarl.europa.eu/committees/en/archives/8/document-search) using the reference numbers in the table below.

In [4]:
# Extract top-3 and bottom-3 ranking.
top = get_dossiers(features, params, which='top', n=3)
btm = get_dossiers(features, params, which='bottom', n=3)[::-1]

# Display table.
fields = ['d_i', 'type', 'dossier', '#nodes', '#cliques', 'cliq sz', '% acc']
header = ''.join([f'{fields[0]:>6}'] + [f'{s:>13}' for s in fields[1:]])
print(header)
print('-' * len(header))
display_table(top, edits, conflicts)
print('-' * len(header))
display_table(btm, edits, conflicts)

   d_i         type      dossier       #nodes     #cliques      cliq sz        % acc
------------------------------------------------------------------------------------
 3.306       report     198-2018         1040          272          3.1          2.6
 3.205       report     245-2018         2657          577          4.3          2.6
 3.107       report     213-2016         1292          319          3.4          6.0
------------------------------------------------------------------------------------
-2.615      opinion  2018-627870           60            1          2.0         90.0
-2.646      opinion  2018-622198           69            0          0.0         98.6
-2.852      opinion  2018-626670           63            6          2.0         84.1


## Rapporteur advantage

In [5]:
print(f'Rapporteur advantage: {model.parameters.get_group("rapporteur")[0]:.2f}')

Rapporteur advantage: 1.31


Obtain the value for the rapporteur advantage for EP7 by setting `LEG = 7` in the first cell and running the notebook again.