In [2]:
# Extract top k important sentences from the input document based on attribution scores

import inseq
import torch
from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import login
from datasets import load_dataset
import evaluate

import json
import argparse
from pathlib import Path
from tqdm import tqdm


input_key = {
    "xsum": "document",
    "cnn_dm": "article"
}

output_key = {
    "xsum": "summary",
    "cnn_dm": "highlights"
}

# Check if the current token is the end of a sentence
# Note that: this algo cannot handle the corner case with abbreviation, e.g. "P.E."
def is_sentence_ending(text):
    if text.endswith(("!", ".", "?")):
        return True
    if text.endswith((".\"", "?\"", "!\"")):
        return True
    
def get_token_length(text, tokenizer):
    encoded_text = tokenizer(text, 
                             return_tensors="pt", 
                             add_special_tokens=False).input_ids
    
    return encoded_text.shape[-1]

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
login("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")

model_name = "mistralai/Mistral-7B-Instruct-v0.2"
# Load model and tokenizer
# model_name = "meta-llama/Llama-2-7b-hf"
config = AutoConfig.from_pretrained(model_name)
context_window_length = getattr(config, 'max_position_embeddings', 
                                getattr(config, 'n_positions', None))

model = AutoModelForCausalLM.from_pretrained(model_name, 
                                            torch_dtype=torch.bfloat16, 
                                            device_map="auto",
                                            use_auth_token=True,
                                            cache_dir="/mnt/ssd/llms")
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.model_max_length = context_window_length

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


Loading checkpoint shards: 100%|██████████| 3/3 [00:03<00:00,  1.01s/it]


In [4]:
# test_data = load_dataset("xsum", split="test")
test_data = load_dataset('cnn_dailymail', '3.0.0', split='test')
test_data[0]

{'article': '(CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based. The Palestinians signed the ICC\'s founding Rome Statute in January, when they also accepted its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the situation in Palestinian territories, paving the way for possible war crimes investigations against Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and the United States, neither of which is an ICC member, opposed the Palestinians\' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday

In [6]:
## Alternative way to implement inseq attribution

instruction = "Summarise the document below:"
prompt_message = f"{instruction}\n{test_data[1]['document']}"
messages = [{
    "role": "user", 
    "content": prompt_message
}]

prompt_text = tokenizer.apply_chat_template(messages,
                                            tokenize=False,
                                            add_generation_prompt=True)
print(prompt_text)
inseq_model = inseq.load_model(model, "attention", tokenizer=model_name)
out = inseq_model.attribute(prompt_text, 
                            generation_args={"do_sample": False, 
                                             "max_new_tokens": 64,
                                             "temperature": 0.0,
                                             "skip_special_tokens": False}, 
                            skip_special_tokens=False)
out.show()

KeyError: 'document'

In [5]:
prompt = tokenizer.apply_chat_template(messages, 
                                        return_tensors="pt", 
                                        add_generation_prompt=True).to(model.device)
print("Token length: ", prompt.shape)
prompt_text = tokenizer.apply_chat_template(messages,
                                            tokenize=False,
                                            add_generation_prompt=True)

inseq_model = inseq.load_model(model, "attention", tokenizer=model_name)
output_ids = model.generate(prompt,
                            do_sample=False,
                            max_new_tokens=64,
                            temperature=0.0)

output_text = tokenizer.decode(output_ids[0, prompt.shape[1]:], skip_special_tokens=False)
# output_text = output_text.split('.')[0] + "."  # Note: only keep the first sentence for debugging; for summarisaiton task: keep until \n\n or the last complete sentence [TODO]
# output_text = tokenizer.decode(output_ids[0, prompt.shape[1]:], skip_special_tokens=True)

print(output_text)

NameError: name 'messages' is not defined

In [9]:
instruction = "Summarise the document below:"
# prompt_message = f"{instruction} If you're famous and performing the American national anthem, be prepared to become a national hero or a national disgrace."
# prompt_message = f"{instruction}\n\n{test_data[0]['document']}" # Test on XSum
prompt_message = f"{instruction}\n\n{test_data[2]['article']}"  # Test on CNN/DM
messages = [{
    "role": "user", 
    "content": prompt_message
}]

prompt = tokenizer.apply_chat_template(messages, 
                                        return_tensors="pt", 
                                        add_generation_prompt=True).to(model.device)
print("Token length: ", prompt.shape)
prompt_text = tokenizer.apply_chat_template(messages,
                                            tokenize=False,
                                            add_generation_prompt=True)

inseq_model = inseq.load_model(model, "attention", tokenizer=model_name)
output_ids = model.generate(prompt,
                            do_sample=False,
                            max_new_tokens=64,
                            temperature=0.0)

output_text = tokenizer.decode(output_ids[0, prompt.shape[1]:], skip_special_tokens=False)
# output_text = output_text.split('.')[0] + "."  # Note: only keep the first sentence for debugging; for summarisaiton task: keep until \n\n or the last complete sentence [TODO]
# output_text = tokenizer.decode(output_ids[0, prompt.shape[1]:], skip_special_tokens=True)

print(output_text)
out = inseq_model.attribute(
    input_texts=prompt_text,
    generated_texts=prompt_text + output_text,
)

# out.show()

The model is loaded with a device map. The device cannot be changed after loading.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Token length:  torch.Size([1, 1043])


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Mohammad Javad Zarif is the Iranian foreign minister who has been instrumental in nuclear discussions with the US, aiming to end sanctions against Iran. He received a warm welcome upon returning to Iran and is known for his polished and jovial demeanor. Less known facts about Z


Attributing with attention...: 1045it [00:05,  5.61s/it]


In [9]:
print(output_text)
out = inseq_model.attribute(
    input_texts=prompt_text,
    generated_texts=prompt_text + output_text,
)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


A multi-millionaire property developer, Peter Morgan, was infatuated with a former burlesque dancer and escort, Georgina Symonds, whom he met three years prior.


Attributing with attention...: 2873it [00:00, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 16.16 GiB. GPU 

In [6]:
# Aggregate the attribution scores for each input sentence
# Process intrucitons and special tokens in chat template separately
start_marker = "<s><s>[INST]"
end_marker = "[/INST]"

# Calculate the token length for each part of the prompt
len_start_marker = get_token_length(start_marker, tokenizer)
len_end_marker = get_token_length(end_marker, tokenizer)
len_instruction = get_token_length(instruction, tokenizer)
len_prompt = get_token_length(prompt_message, tokenizer)
total_prompt_len = len_start_marker + len_prompt

doc_start_pos = len_start_marker + len_instruction
start_span = (0, len_start_marker)
instr_span = (len_start_marker, len_start_marker + len_instruction)
end_span = (total_prompt_len, total_prompt_len + len_end_marker)

ends = [i + 1 for i, t in enumerate(out[0].target) if is_sentence_ending(t.token) and i < total_prompt_len] + [total_prompt_len]
starts = [doc_start_pos] + [i + 1 for i, t in enumerate(out[0].target) if is_sentence_ending(t.token) and i < total_prompt_len]
spans = [start_span, instr_span] + list(zip(starts, ends)) + [end_span]

# Remove empty spans 
processed_spans = []
for span in spans:
    if span[0] + 1 < span[1]:
        processed_spans.append(span)

print(processed_spans)
res = out.aggregate("spans", target_spans=processed_spans)
res.show()

[(0, 5), (5, 12), (12, 55), (55, 79), (79, 130), (130, 164), (164, 184), (184, 211), (211, 240), (240, 287), (287, 304), (304, 337), (337, 378), (378, 394), (394, 403), (403, 459), (459, 500), (500, 536), (536, 555), (555, 596), (596, 610), (610, 637), (637, 666), (666, 695), (695, 720), (720, 746), (746, 758), (758, 787), (787, 811), (811, 815)]


Unnamed: 0_level_0,The,▁Palestinian,▁Authority,▁officially,▁joined,▁the,▁International,▁Crim,inal,▁Court,▁(,IC,C,),▁as,▁its,▁,1,2,3,rd,▁member,▁on,▁Wednesday,",",▁grant,ing,▁the,▁court,▁juris,diction,▁over,▁alleged,▁crimes,▁in,▁Palestinian,▁territor,ies,.
<s><s>▁[INST],0.564,0.574,0.575,0.548,0.552,0.56,0.512,0.547,0.505,0.546,0.516,0.548,0.565,0.549,0.44,0.492,0.509,0.529,0.549,0.528,0.528,0.484,0.451,0.486,0.47,0.433,0.52,0.447,0.439,0.423,0.5,0.415,0.41,0.422,0.406,0.42,0.441,0.454,0.363
▁Summarise▁the▁document▁below:,0.059,0.029,0.005,0.006,0.008,0.004,0.004,0.003,0.001,0.002,0.004,0.005,0.002,0.002,0.005,0.003,0.002,0.002,0.011,0.002,0.002,0.002,0.004,0.005,0.005,0.004,0.002,0.001,0.002,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001
"<0x0A><0x0A>(CNN)The▁Palestinian▁Authority▁officially▁became▁the▁123rd▁member▁of▁the▁International▁Criminal▁Court▁on▁Wednesday,▁a▁step▁that▁gives▁the▁court▁jurisdiction▁over▁alleged▁crimes▁in▁Palestinian▁territories.",0.104,0.115,0.132,0.123,0.112,0.106,0.118,0.13,0.146,0.139,0.116,0.092,0.101,0.109,0.104,0.099,0.112,0.124,0.097,0.126,0.113,0.108,0.102,0.116,0.106,0.106,0.124,0.142,0.127,0.143,0.182,0.153,0.182,0.195,0.192,0.165,0.166,0.166,0.127
"▁The▁formal▁accession▁was▁marked▁with▁a▁ceremony▁at▁The▁Hague,▁in▁the▁Netherlands,▁where▁the▁court▁is▁based.",0.01,0.018,0.011,0.018,0.02,0.012,0.008,0.006,0.004,0.005,0.011,0.007,0.004,0.005,0.014,0.01,0.006,0.005,0.006,0.007,0.004,0.005,0.011,0.013,0.012,0.013,0.007,0.006,0.011,0.006,0.003,0.004,0.003,0.002,0.002,0.003,0.002,0.002,0.004
"▁The▁Palestinians▁signed▁the▁ICC's▁founding▁Rome▁Statute▁in▁January,▁when▁they▁also▁accepted▁its▁jurisdiction▁over▁alleged▁crimes▁committed▁""in▁the▁occupied▁Palestinian▁territory,▁including▁East▁Jerusalem,▁since▁June▁13,▁2014.""",0.01,0.012,0.014,0.013,0.01,0.01,0.014,0.015,0.011,0.009,0.009,0.014,0.009,0.009,0.01,0.008,0.007,0.006,0.006,0.006,0.004,0.005,0.007,0.007,0.006,0.012,0.008,0.012,0.009,0.011,0.013,0.017,0.024,0.022,0.024,0.024,0.019,0.029,0.024
"▁Later▁that▁month,▁the▁ICC▁opened▁a▁preliminary▁examination▁into▁the▁situation▁in▁Palestinian▁territories,▁paving▁the▁way▁for▁possible▁war▁crimes▁investigations▁against▁Israelis.",0.003,0.003,0.005,0.004,0.004,0.004,0.004,0.006,0.02,0.005,0.004,0.004,0.005,0.004,0.005,0.003,0.002,0.002,0.003,0.003,0.002,0.002,0.004,0.004,0.004,0.007,0.004,0.009,0.005,0.006,0.005,0.008,0.012,0.013,0.012,0.015,0.015,0.012,0.014
"▁As▁members▁of▁the▁court,▁Palestinians▁may▁be▁subject▁to▁counter-charges▁as▁well.",0.009,0.006,0.01,0.007,0.007,0.008,0.007,0.005,0.006,0.003,0.004,0.003,0.003,0.003,0.007,0.009,0.006,0.004,0.004,0.004,0.003,0.006,0.006,0.003,0.003,0.007,0.006,0.008,0.005,0.007,0.003,0.005,0.005,0.004,0.003,0.003,0.003,0.002,0.006
"▁Israel▁and▁the▁United▁States,▁neither▁of▁which▁is▁an▁ICC▁member,▁opposed▁the▁Palestinians'▁efforts▁to▁join▁the▁body.",0.006,0.006,0.008,0.009,0.009,0.011,0.011,0.007,0.005,0.004,0.005,0.006,0.006,0.004,0.007,0.006,0.005,0.004,0.004,0.004,0.003,0.004,0.007,0.004,0.004,0.007,0.004,0.004,0.005,0.003,0.002,0.003,0.002,0.002,0.002,0.002,0.002,0.002,0.004
"▁But▁Palestinian▁Foreign▁Minister▁Riad▁al-Malki,▁speaking▁at▁Wednesday's▁ceremony,▁said▁it▁was▁a▁move▁toward▁greater▁justice.",0.006,0.006,0.011,0.005,0.004,0.003,0.002,0.002,0.003,0.002,0.003,0.002,0.001,0.001,0.004,0.003,0.002,0.002,0.005,0.003,0.002,0.001,0.004,0.008,0.01,0.006,0.003,0.003,0.002,0.002,0.002,0.003,0.002,0.002,0.002,0.001,0.001,0.001,0.002
"▁""As▁Palestine▁formally▁becomes▁a▁State▁Party▁to▁the▁Rome▁Statute▁today,▁the▁world▁is▁also▁a▁step▁closer▁to▁ending▁a▁long▁era▁of▁impunity▁and▁injustice,""▁he▁said,▁according▁to▁an▁ICC▁news▁release.",0.005,0.004,0.006,0.008,0.007,0.006,0.006,0.005,0.004,0.004,0.004,0.005,0.005,0.003,0.005,0.01,0.005,0.005,0.004,0.003,0.003,0.005,0.005,0.007,0.004,0.006,0.003,0.004,0.003,0.003,0.002,0.002,0.003,0.003,0.002,0.002,0.002,0.002,0.002
"▁""Indeed,▁today▁brings▁us▁closer▁to▁our▁shared▁goals▁of▁justice▁and▁peace.""",0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.002,0.002,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.0,0.0,0.0,0.001
"▁Judge▁Kuniko▁Ozaki,▁a▁vice▁president▁of▁the▁ICC,▁said▁acceding▁to▁the▁treaty▁was▁just▁the▁first▁step▁for▁the▁Palestinians.",0.008,0.007,0.006,0.006,0.006,0.005,0.005,0.004,0.003,0.003,0.003,0.005,0.004,0.003,0.005,0.004,0.003,0.003,0.005,0.003,0.002,0.003,0.003,0.003,0.003,0.005,0.004,0.003,0.004,0.003,0.002,0.002,0.001,0.001,0.001,0.002,0.002,0.001,0.002
"▁""As▁the▁Rome▁Statute▁today▁enters▁into▁force▁for▁the▁State▁of▁Palestine,▁Palestine▁acquires▁all▁the▁rights▁as▁well▁as▁responsibilities▁that▁come▁with▁being▁a▁State▁Party▁to▁the▁Statute.",0.002,0.002,0.005,0.005,0.003,0.004,0.003,0.002,0.002,0.002,0.002,0.002,0.002,0.002,0.004,0.006,0.006,0.004,0.001,0.003,0.003,0.004,0.003,0.002,0.003,0.004,0.004,0.005,0.003,0.004,0.003,0.003,0.002,0.001,0.001,0.002,0.002,0.001,0.002
"▁These▁are▁substantive▁commitments,▁which▁cannot▁be▁taken▁lightly,""▁she▁said.",0.002,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.0,0.001,0.001,0.001,0.0,0.001,0.001,0.001,0.001,0.002,0.001,0.001,0.0,0.001,0.001,0.001,0.002,0.002,0.001,0.001,0.001,0.001,0.001,0.0,0.0,0.0,0.0,0.0,0.0,0.001
▁Rights▁group▁Human▁Rights▁Watch▁welcomed▁the▁development.,0.003,0.002,0.002,0.002,0.002,0.002,0.001,0.002,0.002,0.001,0.001,0.001,0.001,0.001,0.002,0.001,0.001,0.001,0.002,0.001,0.001,0.001,0.001,0.001,0.002,0.002,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.0,0.001,0.0,0.001
"▁""Governments▁seeking▁to▁penalize▁Palestine▁for▁joining▁the▁ICC▁should▁immediately▁end▁their▁pressure,▁and▁countries▁that▁support▁universal▁acceptance▁of▁the▁court's▁treaty▁should▁speak▁out▁to▁welcome▁its▁membership,""▁said▁Balkees▁Jarrah,▁international▁justice▁counsel▁for▁the▁group.",0.002,0.002,0.004,0.003,0.003,0.005,0.005,0.004,0.005,0.004,0.003,0.002,0.004,0.003,0.003,0.002,0.002,0.002,0.002,0.002,0.002,0.002,0.002,0.001,0.002,0.002,0.002,0.002,0.003,0.003,0.002,0.002,0.002,0.002,0.002,0.001,0.002,0.001,0.001
"▁""What's▁objectionable▁is▁the▁attempts▁to▁undermine▁international▁justice,▁not▁Palestine's▁decision▁to▁join▁a▁treaty▁to▁which▁over▁100▁countries▁around▁the▁world▁are▁members.""",0.006,0.004,0.004,0.004,0.004,0.005,0.003,0.003,0.005,0.002,0.002,0.002,0.002,0.002,0.003,0.002,0.002,0.005,0.004,0.003,0.003,0.003,0.003,0.003,0.002,0.003,0.002,0.002,0.002,0.002,0.001,0.002,0.002,0.001,0.001,0.001,0.001,0.001,0.002
"▁In▁January,▁when▁the▁preliminary▁ICC▁examination▁was▁opened,▁Israeli▁Prime▁Minister▁Benjamin▁Netanyahu▁described▁it▁as▁an▁outrage,▁saying▁the▁court▁was▁overstepping▁its▁boundaries.",0.004,0.003,0.003,0.003,0.002,0.002,0.002,0.003,0.005,0.003,0.002,0.003,0.003,0.002,0.002,0.001,0.001,0.001,0.003,0.002,0.001,0.001,0.002,0.003,0.003,0.003,0.002,0.002,0.003,0.003,0.002,0.001,0.002,0.001,0.001,0.001,0.001,0.001,0.001
"▁The▁United▁States▁also▁said▁it▁""strongly""▁disagreed▁with▁the▁court's▁decision.",0.003,0.002,0.001,0.001,0.002,0.001,0.001,0.001,0.002,0.001,0.001,0.002,0.002,0.001,0.001,0.001,0.001,0.001,0.003,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.002,0.002,0.001,0.001,0.001,0.001,0.0,0.0,0.001,0.001,0.001
"▁""As▁we▁have▁said▁repeatedly,▁we▁do▁not▁believe▁that▁Palestine▁is▁a▁state▁and▁therefore▁we▁do▁not▁believe▁that▁it▁is▁eligible▁to▁join▁the▁ICC,""▁the▁State▁Department▁said▁in▁a▁statement.",0.002,0.001,0.003,0.002,0.002,0.003,0.003,0.003,0.007,0.003,0.002,0.002,0.003,0.002,0.002,0.002,0.002,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.002,0.002,0.002,0.002,0.002,0.002,0.002,0.002,0.002,0.002,0.001,0.002,0.002,0.001
▁It▁urged▁the▁warring▁sides▁to▁resolve▁their▁differences▁through▁direct▁negotiations.,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.002,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.0,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001
"▁""We▁will▁continue▁to▁oppose▁actions▁against▁Israel▁at▁the▁ICC▁as▁counterproductive▁to▁the▁cause▁of▁peace,""▁it▁said.",0.001,0.001,0.002,0.001,0.001,0.001,0.001,0.002,0.003,0.002,0.001,0.002,0.002,0.002,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001
"▁But▁the▁ICC▁begs▁to▁differ▁with▁the▁definition▁of▁a▁state▁for▁its▁purposes▁and▁refers▁to▁the▁territories▁as▁""Palestine.""",0.002,0.002,0.004,0.003,0.002,0.002,0.003,0.003,0.004,0.003,0.002,0.003,0.005,0.002,0.002,0.002,0.002,0.002,0.002,0.002,0.001,0.002,0.002,0.001,0.003,0.003,0.003,0.003,0.003,0.003,0.003,0.003,0.003,0.002,0.002,0.004,0.006,0.009,0.005
"▁While▁a▁preliminary▁examination▁is▁not▁a▁formal▁investigation,▁it▁allows▁the▁court▁to▁review▁evidence▁and▁determine▁whether▁to▁investigate▁suspects▁on▁both▁sides.",0.002,0.001,0.002,0.002,0.002,0.002,0.002,0.002,0.008,0.003,0.001,0.001,0.002,0.002,0.002,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.002,0.001,0.001,0.002,0.002,0.004,0.004,0.004,0.003,0.003,0.004,0.004,0.003,0.002,0.002,0.002,0.003
"▁Prosecutor▁Fatou▁Bensouda▁said▁her▁office▁would▁""conduct▁its▁analysis▁in▁full▁independence▁and▁impartiality.""",0.002,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.003,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.002,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.002,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001
"▁The▁war▁between▁Israel▁and▁Hamas▁militants▁in▁Gaza▁last▁summer▁left▁more▁than▁2,000▁people▁dead.",0.007,0.006,0.004,0.003,0.003,0.002,0.002,0.002,0.011,0.002,0.002,0.003,0.002,0.002,0.002,0.001,0.001,0.002,0.005,0.002,0.001,0.001,0.002,0.003,0.003,0.002,0.001,0.001,0.001,0.001,0.001,0.002,0.002,0.003,0.003,0.003,0.004,0.006,0.004
▁The▁inquiry▁will▁include▁alleged▁war▁crimes▁committed▁since▁June.,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.01,0.002,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.001,0.002,0.003,0.003,0.006,0.011,0.007,0.002,0.002,0.003,0.004
"▁The▁International▁Criminal▁Court▁was▁set▁up▁in▁2002▁to▁prosecute▁genocide,▁crimes▁against▁humanity▁and▁war▁crimes.",0.004,0.004,0.004,0.003,0.003,0.005,0.012,0.017,0.015,0.014,0.006,0.006,0.008,0.006,0.003,0.002,0.002,0.003,0.003,0.002,0.002,0.002,0.003,0.002,0.002,0.003,0.002,0.003,0.006,0.006,0.004,0.003,0.004,0.006,0.006,0.002,0.002,0.002,0.003
"▁CNN's▁Vasco▁Cotovio,▁Kareem▁Khadder▁and▁Faith▁Karimi▁contributed▁to▁this▁report.",0.016,0.009,0.005,0.004,0.005,0.003,0.003,0.004,0.003,0.003,0.003,0.007,0.003,0.003,0.003,0.003,0.002,0.002,0.008,0.003,0.002,0.002,0.002,0.004,0.005,0.004,0.002,0.002,0.002,0.002,0.001,0.001,0.001,0.001,0.001,0.001,0.002,0.002,0.001
▁[/INST],0.156,0.097,0.038,0.043,0.042,0.028,0.024,0.026,0.018,0.021,0.03,0.032,0.017,0.02,0.03,0.019,0.016,0.022,0.045,0.021,0.022,0.014,0.028,0.032,0.032,0.028,0.017,0.014,0.014,0.014,0.011,0.017,0.013,0.013,0.011,0.013,0.013,0.012,0.017
The,Unnamed: 1_level_31,0.077,0.057,0.04,0.032,0.021,0.018,0.014,0.008,0.011,0.018,0.016,0.009,0.009,0.02,0.012,0.008,0.009,0.02,0.009,0.01,0.006,0.016,0.016,0.016,0.015,0.008,0.006,0.006,0.005,0.003,0.007,0.005,0.005,0.004,0.006,0.004,0.004,0.007
▁Palestinian,Unnamed: 1_level_32,Unnamed: 2_level_32,0.073,0.057,0.021,0.019,0.02,0.016,0.019,0.011,0.017,0.01,0.007,0.009,0.013,0.012,0.011,0.008,0.006,0.007,0.007,0.008,0.011,0.006,0.007,0.008,0.006,0.008,0.007,0.007,0.004,0.005,0.009,0.006,0.008,0.016,0.012,0.008,0.006
▁Authority,Unnamed: 1_level_33,Unnamed: 2_level_33,Unnamed: 3_level_33,0.071,0.058,0.037,0.027,0.017,0.013,0.015,0.027,0.012,0.008,0.013,0.023,0.022,0.018,0.01,0.007,0.008,0.01,0.014,0.018,0.01,0.011,0.011,0.008,0.009,0.009,0.008,0.005,0.006,0.007,0.005,0.006,0.012,0.015,0.008,0.006
▁officially,Unnamed: 1_level_34,Unnamed: 2_level_34,Unnamed: 3_level_34,Unnamed: 4_level_34,0.067,0.061,0.032,0.015,0.007,0.011,0.027,0.015,0.007,0.012,0.03,0.028,0.017,0.01,0.012,0.01,0.008,0.011,0.018,0.019,0.018,0.018,0.008,0.008,0.006,0.006,0.003,0.006,0.004,0.003,0.003,0.004,0.003,0.003,0.005
▁joined,Unnamed: 1_level_35,Unnamed: 2_level_35,Unnamed: 3_level_35,Unnamed: 4_level_35,Unnamed: 5_level_35,0.065,0.085,0.032,0.017,0.026,0.047,0.018,0.009,0.025,0.049,0.047,0.031,0.017,0.01,0.012,0.013,0.024,0.027,0.02,0.02,0.024,0.016,0.017,0.011,0.01,0.005,0.012,0.008,0.004,0.005,0.005,0.004,0.003,0.007
▁the,Unnamed: 1_level_36,Unnamed: 2_level_36,Unnamed: 3_level_36,Unnamed: 4_level_36,Unnamed: 5_level_36,Unnamed: 6_level_36,0.056,0.045,0.015,0.027,0.019,0.013,0.009,0.014,0.016,0.013,0.01,0.006,0.003,0.004,0.005,0.01,0.006,0.004,0.004,0.005,0.005,0.006,0.007,0.004,0.002,0.003,0.003,0.002,0.002,0.003,0.002,0.002,0.002
▁International,Unnamed: 1_level_37,Unnamed: 2_level_37,Unnamed: 3_level_37,Unnamed: 4_level_37,Unnamed: 5_level_37,Unnamed: 6_level_37,Unnamed: 7_level_37,0.055,0.061,0.047,0.017,0.023,0.02,0.01,0.01,0.009,0.007,0.006,0.003,0.004,0.005,0.007,0.006,0.003,0.004,0.004,0.004,0.007,0.012,0.006,0.003,0.003,0.003,0.002,0.002,0.003,0.003,0.002,0.002
▁Crim,Unnamed: 1_level_38,Unnamed: 2_level_38,Unnamed: 3_level_38,Unnamed: 4_level_38,Unnamed: 5_level_38,Unnamed: 6_level_38,Unnamed: 7_level_38,Unnamed: 8_level_38,0.045,0.02,0.01,0.005,0.006,0.005,0.004,0.003,0.002,0.002,0.002,0.002,0.002,0.002,0.003,0.002,0.003,0.003,0.003,0.004,0.004,0.003,0.003,0.003,0.004,0.006,0.006,0.003,0.003,0.002,0.003
inal,Unnamed: 1_level_39,Unnamed: 2_level_39,Unnamed: 3_level_39,Unnamed: 4_level_39,Unnamed: 5_level_39,Unnamed: 6_level_39,Unnamed: 7_level_39,Unnamed: 8_level_39,Unnamed: 9_level_39,0.044,0.027,0.024,0.023,0.013,0.009,0.007,0.005,0.004,0.002,0.002,0.003,0.007,0.005,0.002,0.003,0.003,0.004,0.008,0.014,0.007,0.005,0.003,0.005,0.004,0.004,0.003,0.003,0.002,0.002
▁Court,Unnamed: 1_level_40,Unnamed: 2_level_40,Unnamed: 3_level_40,Unnamed: 4_level_40,Unnamed: 5_level_40,Unnamed: 6_level_40,Unnamed: 7_level_40,Unnamed: 8_level_40,Unnamed: 9_level_40,Unnamed: 10_level_40,0.05,0.054,0.037,0.052,0.052,0.03,0.021,0.012,0.005,0.008,0.01,0.018,0.027,0.013,0.016,0.016,0.011,0.015,0.018,0.016,0.01,0.01,0.012,0.008,0.008,0.008,0.009,0.006,0.007
▁(,Unnamed: 1_level_41,Unnamed: 2_level_41,Unnamed: 3_level_41,Unnamed: 4_level_41,Unnamed: 5_level_41,Unnamed: 6_level_41,Unnamed: 7_level_41,Unnamed: 8_level_41,Unnamed: 9_level_41,Unnamed: 10_level_41,Unnamed: 11_level_41,0.049,0.053,0.034,0.018,0.012,0.01,0.009,0.009,0.008,0.008,0.009,0.008,0.007,0.007,0.006,0.005,0.005,0.007,0.006,0.002,0.004,0.003,0.003,0.002,0.003,0.003,0.003,0.003
IC,Unnamed: 1_level_42,Unnamed: 2_level_42,Unnamed: 3_level_42,Unnamed: 4_level_42,Unnamed: 5_level_42,Unnamed: 6_level_42,Unnamed: 7_level_42,Unnamed: 8_level_42,Unnamed: 9_level_42,Unnamed: 10_level_42,Unnamed: 11_level_42,Unnamed: 12_level_42,0.049,0.031,0.01,0.006,0.004,0.005,0.003,0.004,0.004,0.005,0.004,0.003,0.003,0.003,0.004,0.007,0.012,0.004,0.002,0.002,0.002,0.002,0.001,0.002,0.002,0.002,0.002
C,Unnamed: 1_level_43,Unnamed: 2_level_43,Unnamed: 3_level_43,Unnamed: 4_level_43,Unnamed: 5_level_43,Unnamed: 6_level_43,Unnamed: 7_level_43,Unnamed: 8_level_43,Unnamed: 9_level_43,Unnamed: 10_level_43,Unnamed: 11_level_43,Unnamed: 12_level_43,Unnamed: 13_level_43,0.028,0.029,0.012,0.008,0.006,0.003,0.005,0.006,0.008,0.008,0.004,0.004,0.005,0.005,0.009,0.011,0.009,0.005,0.003,0.005,0.004,0.004,0.003,0.004,0.003,0.003
),Unnamed: 1_level_44,Unnamed: 2_level_44,Unnamed: 3_level_44,Unnamed: 4_level_44,Unnamed: 5_level_44,Unnamed: 6_level_44,Unnamed: 7_level_44,Unnamed: 8_level_44,Unnamed: 9_level_44,Unnamed: 10_level_44,Unnamed: 11_level_44,Unnamed: 12_level_44,Unnamed: 13_level_44,Unnamed: 14_level_44,0.045,0.043,0.023,0.012,0.006,0.009,0.011,0.016,0.024,0.014,0.016,0.015,0.009,0.009,0.007,0.007,0.003,0.007,0.005,0.003,0.004,0.004,0.003,0.003,0.006
▁as,Unnamed: 1_level_45,Unnamed: 2_level_45,Unnamed: 3_level_45,Unnamed: 4_level_45,Unnamed: 5_level_45,Unnamed: 6_level_45,Unnamed: 7_level_45,Unnamed: 8_level_45,Unnamed: 9_level_45,Unnamed: 10_level_45,Unnamed: 11_level_45,Unnamed: 12_level_45,Unnamed: 13_level_45,Unnamed: 14_level_45,Unnamed: 15_level_45,0.048,0.063,0.029,0.01,0.018,0.022,0.031,0.027,0.015,0.015,0.013,0.008,0.008,0.005,0.005,0.002,0.006,0.004,0.002,0.003,0.003,0.002,0.002,0.004
▁its,Unnamed: 1_level_46,Unnamed: 2_level_46,Unnamed: 3_level_46,Unnamed: 4_level_46,Unnamed: 5_level_46,Unnamed: 6_level_46,Unnamed: 7_level_46,Unnamed: 8_level_46,Unnamed: 9_level_46,Unnamed: 10_level_46,Unnamed: 11_level_46,Unnamed: 12_level_46,Unnamed: 13_level_46,Unnamed: 14_level_46,Unnamed: 15_level_46,Unnamed: 16_level_46,0.056,0.072,0.025,0.036,0.045,0.042,0.018,0.01,0.008,0.008,0.006,0.007,0.006,0.005,0.002,0.004,0.003,0.002,0.002,0.004,0.003,0.002,0.002
▁,Unnamed: 1_level_47,Unnamed: 2_level_47,Unnamed: 3_level_47,Unnamed: 4_level_47,Unnamed: 5_level_47,Unnamed: 6_level_47,Unnamed: 7_level_47,Unnamed: 8_level_47,Unnamed: 9_level_47,Unnamed: 10_level_47,Unnamed: 11_level_47,Unnamed: 12_level_47,Unnamed: 13_level_47,Unnamed: 14_level_47,Unnamed: 15_level_47,Unnamed: 16_level_47,Unnamed: 17_level_47,0.049,0.053,0.042,0.028,0.017,0.007,0.005,0.004,0.004,0.003,0.003,0.003,0.002,0.001,0.002,0.002,0.001,0.001,0.002,0.001,0.001,0.001
1,Unnamed: 1_level_48,Unnamed: 2_level_48,Unnamed: 3_level_48,Unnamed: 4_level_48,Unnamed: 5_level_48,Unnamed: 6_level_48,Unnamed: 7_level_48,Unnamed: 8_level_48,Unnamed: 9_level_48,Unnamed: 10_level_48,Unnamed: 11_level_48,Unnamed: 12_level_48,Unnamed: 13_level_48,Unnamed: 14_level_48,Unnamed: 15_level_48,Unnamed: 16_level_48,Unnamed: 17_level_48,Unnamed: 18_level_48,0.038,0.029,0.02,0.011,0.01,0.01,0.011,0.011,0.008,0.007,0.007,0.006,0.005,0.008,0.005,0.005,0.005,0.006,0.006,0.005,0.008
2,Unnamed: 1_level_49,Unnamed: 2_level_49,Unnamed: 3_level_49,Unnamed: 4_level_49,Unnamed: 5_level_49,Unnamed: 6_level_49,Unnamed: 7_level_49,Unnamed: 8_level_49,Unnamed: 9_level_49,Unnamed: 10_level_49,Unnamed: 11_level_49,Unnamed: 12_level_49,Unnamed: 13_level_49,Unnamed: 14_level_49,Unnamed: 15_level_49,Unnamed: 16_level_49,Unnamed: 17_level_49,Unnamed: 18_level_49,Unnamed: 19_level_49,0.041,0.033,0.013,0.006,0.004,0.003,0.003,0.002,0.002,0.002,0.002,0.001,0.002,0.002,0.001,0.001,0.002,0.001,0.001,0.001
3,Unnamed: 1_level_50,Unnamed: 2_level_50,Unnamed: 3_level_50,Unnamed: 4_level_50,Unnamed: 5_level_50,Unnamed: 6_level_50,Unnamed: 7_level_50,Unnamed: 8_level_50,Unnamed: 9_level_50,Unnamed: 10_level_50,Unnamed: 11_level_50,Unnamed: 12_level_50,Unnamed: 13_level_50,Unnamed: 14_level_50,Unnamed: 15_level_50,Unnamed: 16_level_50,Unnamed: 17_level_50,Unnamed: 18_level_50,Unnamed: 19_level_50,Unnamed: 20_level_50,0.04,0.028,0.011,0.006,0.005,0.004,0.003,0.002,0.003,0.002,0.001,0.002,0.002,0.001,0.001,0.002,0.001,0.001,0.002
rd,Unnamed: 1_level_51,Unnamed: 2_level_51,Unnamed: 3_level_51,Unnamed: 4_level_51,Unnamed: 5_level_51,Unnamed: 6_level_51,Unnamed: 7_level_51,Unnamed: 8_level_51,Unnamed: 9_level_51,Unnamed: 10_level_51,Unnamed: 11_level_51,Unnamed: 12_level_51,Unnamed: 13_level_51,Unnamed: 14_level_51,Unnamed: 15_level_51,Unnamed: 16_level_51,Unnamed: 17_level_51,Unnamed: 18_level_51,Unnamed: 19_level_51,Unnamed: 20_level_51,Unnamed: 21_level_51,0.051,0.033,0.015,0.009,0.009,0.005,0.006,0.006,0.004,0.002,0.004,0.004,0.002,0.002,0.003,0.002,0.001,0.003
▁member,Unnamed: 1_level_52,Unnamed: 2_level_52,Unnamed: 3_level_52,Unnamed: 4_level_52,Unnamed: 5_level_52,Unnamed: 6_level_52,Unnamed: 7_level_52,Unnamed: 8_level_52,Unnamed: 9_level_52,Unnamed: 10_level_52,Unnamed: 11_level_52,Unnamed: 12_level_52,Unnamed: 13_level_52,Unnamed: 14_level_52,Unnamed: 15_level_52,Unnamed: 16_level_52,Unnamed: 17_level_52,Unnamed: 18_level_52,Unnamed: 19_level_52,Unnamed: 20_level_52,Unnamed: 21_level_52,Unnamed: 22_level_52,0.047,0.048,0.031,0.024,0.014,0.016,0.013,0.012,0.006,0.013,0.009,0.005,0.006,0.007,0.006,0.004,0.01
▁on,Unnamed: 1_level_53,Unnamed: 2_level_53,Unnamed: 3_level_53,Unnamed: 4_level_53,Unnamed: 5_level_53,Unnamed: 6_level_53,Unnamed: 7_level_53,Unnamed: 8_level_53,Unnamed: 9_level_53,Unnamed: 10_level_53,Unnamed: 11_level_53,Unnamed: 12_level_53,Unnamed: 13_level_53,Unnamed: 14_level_53,Unnamed: 15_level_53,Unnamed: 16_level_53,Unnamed: 17_level_53,Unnamed: 18_level_53,Unnamed: 19_level_53,Unnamed: 20_level_53,Unnamed: 21_level_53,Unnamed: 22_level_53,Unnamed: 23_level_53,0.049,0.035,0.015,0.009,0.007,0.005,0.005,0.003,0.006,0.004,0.003,0.003,0.004,0.002,0.002,0.004
▁Wednesday,Unnamed: 1_level_54,Unnamed: 2_level_54,Unnamed: 3_level_54,Unnamed: 4_level_54,Unnamed: 5_level_54,Unnamed: 6_level_54,Unnamed: 7_level_54,Unnamed: 8_level_54,Unnamed: 9_level_54,Unnamed: 10_level_54,Unnamed: 11_level_54,Unnamed: 12_level_54,Unnamed: 13_level_54,Unnamed: 14_level_54,Unnamed: 15_level_54,Unnamed: 16_level_54,Unnamed: 17_level_54,Unnamed: 18_level_54,Unnamed: 19_level_54,Unnamed: 20_level_54,Unnamed: 21_level_54,Unnamed: 22_level_54,Unnamed: 23_level_54,Unnamed: 24_level_54,0.055,0.05,0.017,0.012,0.008,0.007,0.004,0.011,0.006,0.004,0.005,0.006,0.004,0.004,0.011
",",Unnamed: 1_level_55,Unnamed: 2_level_55,Unnamed: 3_level_55,Unnamed: 4_level_55,Unnamed: 5_level_55,Unnamed: 6_level_55,Unnamed: 7_level_55,Unnamed: 8_level_55,Unnamed: 9_level_55,Unnamed: 10_level_55,Unnamed: 11_level_55,Unnamed: 12_level_55,Unnamed: 13_level_55,Unnamed: 14_level_55,Unnamed: 15_level_55,Unnamed: 16_level_55,Unnamed: 17_level_55,Unnamed: 18_level_55,Unnamed: 19_level_55,Unnamed: 20_level_55,Unnamed: 21_level_55,Unnamed: 22_level_55,Unnamed: 23_level_55,Unnamed: 24_level_55,Unnamed: 25_level_55,0.047,0.047,0.028,0.013,0.015,0.008,0.02,0.01,0.007,0.008,0.009,0.005,0.006,0.02
▁grant,Unnamed: 1_level_56,Unnamed: 2_level_56,Unnamed: 3_level_56,Unnamed: 4_level_56,Unnamed: 5_level_56,Unnamed: 6_level_56,Unnamed: 7_level_56,Unnamed: 8_level_56,Unnamed: 9_level_56,Unnamed: 10_level_56,Unnamed: 11_level_56,Unnamed: 12_level_56,Unnamed: 13_level_56,Unnamed: 14_level_56,Unnamed: 15_level_56,Unnamed: 16_level_56,Unnamed: 17_level_56,Unnamed: 18_level_56,Unnamed: 19_level_56,Unnamed: 20_level_56,Unnamed: 21_level_56,Unnamed: 22_level_56,Unnamed: 23_level_56,Unnamed: 24_level_56,Unnamed: 25_level_56,Unnamed: 26_level_56,0.048,0.043,0.024,0.025,0.01,0.014,0.008,0.005,0.006,0.005,0.003,0.003,0.007
ing,Unnamed: 1_level_57,Unnamed: 2_level_57,Unnamed: 3_level_57,Unnamed: 4_level_57,Unnamed: 5_level_57,Unnamed: 6_level_57,Unnamed: 7_level_57,Unnamed: 8_level_57,Unnamed: 9_level_57,Unnamed: 10_level_57,Unnamed: 11_level_57,Unnamed: 12_level_57,Unnamed: 13_level_57,Unnamed: 14_level_57,Unnamed: 15_level_57,Unnamed: 16_level_57,Unnamed: 17_level_57,Unnamed: 18_level_57,Unnamed: 19_level_57,Unnamed: 20_level_57,Unnamed: 21_level_57,Unnamed: 22_level_57,Unnamed: 23_level_57,Unnamed: 24_level_57,Unnamed: 25_level_57,Unnamed: 26_level_57,Unnamed: 27_level_57,0.049,0.067,0.061,0.023,0.028,0.018,0.01,0.012,0.011,0.006,0.005,0.015
▁the,Unnamed: 1_level_58,Unnamed: 2_level_58,Unnamed: 3_level_58,Unnamed: 4_level_58,Unnamed: 5_level_58,Unnamed: 6_level_58,Unnamed: 7_level_58,Unnamed: 8_level_58,Unnamed: 9_level_58,Unnamed: 10_level_58,Unnamed: 11_level_58,Unnamed: 12_level_58,Unnamed: 13_level_58,Unnamed: 14_level_58,Unnamed: 15_level_58,Unnamed: 16_level_58,Unnamed: 17_level_58,Unnamed: 18_level_58,Unnamed: 19_level_58,Unnamed: 20_level_58,Unnamed: 21_level_58,Unnamed: 22_level_58,Unnamed: 23_level_58,Unnamed: 24_level_58,Unnamed: 25_level_58,Unnamed: 26_level_58,Unnamed: 27_level_58,Unnamed: 28_level_58,0.046,0.033,0.014,0.01,0.007,0.004,0.004,0.005,0.003,0.002,0.004
▁court,Unnamed: 1_level_59,Unnamed: 2_level_59,Unnamed: 3_level_59,Unnamed: 4_level_59,Unnamed: 5_level_59,Unnamed: 6_level_59,Unnamed: 7_level_59,Unnamed: 8_level_59,Unnamed: 9_level_59,Unnamed: 10_level_59,Unnamed: 11_level_59,Unnamed: 12_level_59,Unnamed: 13_level_59,Unnamed: 14_level_59,Unnamed: 15_level_59,Unnamed: 16_level_59,Unnamed: 17_level_59,Unnamed: 18_level_59,Unnamed: 19_level_59,Unnamed: 20_level_59,Unnamed: 21_level_59,Unnamed: 22_level_59,Unnamed: 23_level_59,Unnamed: 24_level_59,Unnamed: 25_level_59,Unnamed: 26_level_59,Unnamed: 27_level_59,Unnamed: 28_level_59,Unnamed: 29_level_59,0.057,0.055,0.039,0.024,0.012,0.012,0.013,0.008,0.005,0.014
▁juris,Unnamed: 1_level_60,Unnamed: 2_level_60,Unnamed: 3_level_60,Unnamed: 4_level_60,Unnamed: 5_level_60,Unnamed: 6_level_60,Unnamed: 7_level_60,Unnamed: 8_level_60,Unnamed: 9_level_60,Unnamed: 10_level_60,Unnamed: 11_level_60,Unnamed: 12_level_60,Unnamed: 13_level_60,Unnamed: 14_level_60,Unnamed: 15_level_60,Unnamed: 16_level_60,Unnamed: 17_level_60,Unnamed: 18_level_60,Unnamed: 19_level_60,Unnamed: 20_level_60,Unnamed: 21_level_60,Unnamed: 22_level_60,Unnamed: 23_level_60,Unnamed: 24_level_60,Unnamed: 25_level_60,Unnamed: 26_level_60,Unnamed: 27_level_60,Unnamed: 28_level_60,Unnamed: 29_level_60,Unnamed: 30_level_60,0.052,0.055,0.031,0.017,0.014,0.009,0.007,0.005,0.011
diction,Unnamed: 1_level_61,Unnamed: 2_level_61,Unnamed: 3_level_61,Unnamed: 4_level_61,Unnamed: 5_level_61,Unnamed: 6_level_61,Unnamed: 7_level_61,Unnamed: 8_level_61,Unnamed: 9_level_61,Unnamed: 10_level_61,Unnamed: 11_level_61,Unnamed: 12_level_61,Unnamed: 13_level_61,Unnamed: 14_level_61,Unnamed: 15_level_61,Unnamed: 16_level_61,Unnamed: 17_level_61,Unnamed: 18_level_61,Unnamed: 19_level_61,Unnamed: 20_level_61,Unnamed: 21_level_61,Unnamed: 22_level_61,Unnamed: 23_level_61,Unnamed: 24_level_61,Unnamed: 25_level_61,Unnamed: 26_level_61,Unnamed: 27_level_61,Unnamed: 28_level_61,Unnamed: 29_level_61,Unnamed: 30_level_61,Unnamed: 31_level_61,0.043,0.038,0.017,0.018,0.014,0.007,0.007,0.019
▁over,Unnamed: 1_level_62,Unnamed: 2_level_62,Unnamed: 3_level_62,Unnamed: 4_level_62,Unnamed: 5_level_62,Unnamed: 6_level_62,Unnamed: 7_level_62,Unnamed: 8_level_62,Unnamed: 9_level_62,Unnamed: 10_level_62,Unnamed: 11_level_62,Unnamed: 12_level_62,Unnamed: 13_level_62,Unnamed: 14_level_62,Unnamed: 15_level_62,Unnamed: 16_level_62,Unnamed: 17_level_62,Unnamed: 18_level_62,Unnamed: 19_level_62,Unnamed: 20_level_62,Unnamed: 21_level_62,Unnamed: 22_level_62,Unnamed: 23_level_62,Unnamed: 24_level_62,Unnamed: 25_level_62,Unnamed: 26_level_62,Unnamed: 27_level_62,Unnamed: 28_level_62,Unnamed: 29_level_62,Unnamed: 30_level_62,Unnamed: 31_level_62,Unnamed: 32_level_62,0.056,0.062,0.042,0.022,0.011,0.011,0.019
▁alleged,Unnamed: 1_level_63,Unnamed: 2_level_63,Unnamed: 3_level_63,Unnamed: 4_level_63,Unnamed: 5_level_63,Unnamed: 6_level_63,Unnamed: 7_level_63,Unnamed: 8_level_63,Unnamed: 9_level_63,Unnamed: 10_level_63,Unnamed: 11_level_63,Unnamed: 12_level_63,Unnamed: 13_level_63,Unnamed: 14_level_63,Unnamed: 15_level_63,Unnamed: 16_level_63,Unnamed: 17_level_63,Unnamed: 18_level_63,Unnamed: 19_level_63,Unnamed: 20_level_63,Unnamed: 21_level_63,Unnamed: 22_level_63,Unnamed: 23_level_63,Unnamed: 24_level_63,Unnamed: 25_level_63,Unnamed: 26_level_63,Unnamed: 27_level_63,Unnamed: 28_level_63,Unnamed: 29_level_63,Unnamed: 30_level_63,Unnamed: 31_level_63,Unnamed: 32_level_63,Unnamed: 33_level_63,0.063,0.053,0.024,0.011,0.009,0.015
▁crimes,Unnamed: 1_level_64,Unnamed: 2_level_64,Unnamed: 3_level_64,Unnamed: 4_level_64,Unnamed: 5_level_64,Unnamed: 6_level_64,Unnamed: 7_level_64,Unnamed: 8_level_64,Unnamed: 9_level_64,Unnamed: 10_level_64,Unnamed: 11_level_64,Unnamed: 12_level_64,Unnamed: 13_level_64,Unnamed: 14_level_64,Unnamed: 15_level_64,Unnamed: 16_level_64,Unnamed: 17_level_64,Unnamed: 18_level_64,Unnamed: 19_level_64,Unnamed: 20_level_64,Unnamed: 21_level_64,Unnamed: 22_level_64,Unnamed: 23_level_64,Unnamed: 24_level_64,Unnamed: 25_level_64,Unnamed: 26_level_64,Unnamed: 27_level_64,Unnamed: 28_level_64,Unnamed: 29_level_64,Unnamed: 30_level_64,Unnamed: 31_level_64,Unnamed: 32_level_64,Unnamed: 33_level_64,Unnamed: 34_level_64,0.051,0.048,0.024,0.018,0.024
▁in,Unnamed: 1_level_65,Unnamed: 2_level_65,Unnamed: 3_level_65,Unnamed: 4_level_65,Unnamed: 5_level_65,Unnamed: 6_level_65,Unnamed: 7_level_65,Unnamed: 8_level_65,Unnamed: 9_level_65,Unnamed: 10_level_65,Unnamed: 11_level_65,Unnamed: 12_level_65,Unnamed: 13_level_65,Unnamed: 14_level_65,Unnamed: 15_level_65,Unnamed: 16_level_65,Unnamed: 17_level_65,Unnamed: 18_level_65,Unnamed: 19_level_65,Unnamed: 20_level_65,Unnamed: 21_level_65,Unnamed: 22_level_65,Unnamed: 23_level_65,Unnamed: 24_level_65,Unnamed: 25_level_65,Unnamed: 26_level_65,Unnamed: 27_level_65,Unnamed: 28_level_65,Unnamed: 29_level_65,Unnamed: 30_level_65,Unnamed: 31_level_65,Unnamed: 32_level_65,Unnamed: 33_level_65,Unnamed: 34_level_65,Unnamed: 35_level_65,0.05,0.063,0.032,0.027
▁Palestinian,Unnamed: 1_level_66,Unnamed: 2_level_66,Unnamed: 3_level_66,Unnamed: 4_level_66,Unnamed: 5_level_66,Unnamed: 6_level_66,Unnamed: 7_level_66,Unnamed: 8_level_66,Unnamed: 9_level_66,Unnamed: 10_level_66,Unnamed: 11_level_66,Unnamed: 12_level_66,Unnamed: 13_level_66,Unnamed: 14_level_66,Unnamed: 15_level_66,Unnamed: 16_level_66,Unnamed: 17_level_66,Unnamed: 18_level_66,Unnamed: 19_level_66,Unnamed: 20_level_66,Unnamed: 21_level_66,Unnamed: 22_level_66,Unnamed: 23_level_66,Unnamed: 24_level_66,Unnamed: 25_level_66,Unnamed: 26_level_66,Unnamed: 27_level_66,Unnamed: 28_level_66,Unnamed: 29_level_66,Unnamed: 30_level_66,Unnamed: 31_level_66,Unnamed: 32_level_66,Unnamed: 33_level_66,Unnamed: 34_level_66,Unnamed: 35_level_66,Unnamed: 36_level_66,0.056,0.057,0.031
▁territor,Unnamed: 1_level_67,Unnamed: 2_level_67,Unnamed: 3_level_67,Unnamed: 4_level_67,Unnamed: 5_level_67,Unnamed: 6_level_67,Unnamed: 7_level_67,Unnamed: 8_level_67,Unnamed: 9_level_67,Unnamed: 10_level_67,Unnamed: 11_level_67,Unnamed: 12_level_67,Unnamed: 13_level_67,Unnamed: 14_level_67,Unnamed: 15_level_67,Unnamed: 16_level_67,Unnamed: 17_level_67,Unnamed: 18_level_67,Unnamed: 19_level_67,Unnamed: 20_level_67,Unnamed: 21_level_67,Unnamed: 22_level_67,Unnamed: 23_level_67,Unnamed: 24_level_67,Unnamed: 25_level_67,Unnamed: 26_level_67,Unnamed: 27_level_67,Unnamed: 28_level_67,Unnamed: 29_level_67,Unnamed: 30_level_67,Unnamed: 31_level_67,Unnamed: 32_level_67,Unnamed: 33_level_67,Unnamed: 34_level_67,Unnamed: 35_level_67,Unnamed: 36_level_67,Unnamed: 37_level_67,0.051,0.047
ies,Unnamed: 1_level_68,Unnamed: 2_level_68,Unnamed: 3_level_68,Unnamed: 4_level_68,Unnamed: 5_level_68,Unnamed: 6_level_68,Unnamed: 7_level_68,Unnamed: 8_level_68,Unnamed: 9_level_68,Unnamed: 10_level_68,Unnamed: 11_level_68,Unnamed: 12_level_68,Unnamed: 13_level_68,Unnamed: 14_level_68,Unnamed: 15_level_68,Unnamed: 16_level_68,Unnamed: 17_level_68,Unnamed: 18_level_68,Unnamed: 19_level_68,Unnamed: 20_level_68,Unnamed: 21_level_68,Unnamed: 22_level_68,Unnamed: 23_level_68,Unnamed: 24_level_68,Unnamed: 25_level_68,Unnamed: 26_level_68,Unnamed: 27_level_68,Unnamed: 28_level_68,Unnamed: 29_level_68,Unnamed: 30_level_68,Unnamed: 31_level_68,Unnamed: 32_level_68,Unnamed: 33_level_68,Unnamed: 34_level_68,Unnamed: 35_level_68,Unnamed: 36_level_68,Unnamed: 37_level_68,Unnamed: 38_level_68,0.042
.,Unnamed: 1_level_69,Unnamed: 2_level_69,Unnamed: 3_level_69,Unnamed: 4_level_69,Unnamed: 5_level_69,Unnamed: 6_level_69,Unnamed: 7_level_69,Unnamed: 8_level_69,Unnamed: 9_level_69,Unnamed: 10_level_69,Unnamed: 11_level_69,Unnamed: 12_level_69,Unnamed: 13_level_69,Unnamed: 14_level_69,Unnamed: 15_level_69,Unnamed: 16_level_69,Unnamed: 17_level_69,Unnamed: 18_level_69,Unnamed: 19_level_69,Unnamed: 20_level_69,Unnamed: 21_level_69,Unnamed: 22_level_69,Unnamed: 23_level_69,Unnamed: 24_level_69,Unnamed: 25_level_69,Unnamed: 26_level_69,Unnamed: 27_level_69,Unnamed: 28_level_69,Unnamed: 29_level_69,Unnamed: 30_level_69,Unnamed: 31_level_69,Unnamed: 32_level_69,Unnamed: 33_level_69,Unnamed: 34_level_69,Unnamed: 35_level_69,Unnamed: 36_level_69,Unnamed: 37_level_69,Unnamed: 38_level_69,Unnamed: 39_level_69


In [7]:
def clean_token(token):
    processed_token = token.replace("▁", " ")  
    return processed_token

# Aggregate the attribution scores for each input sentence
tok_out = res.aggregate()
prompt_last_index = tok_out[0].attr_pos_start

input_sequences = [clean_token(t.token) for t in tok_out[0].target[2:prompt_last_index-1]]
cleaned_sequences = []
for seq in input_sequences:
    processed_seq = seq.replace("<0x0A>", " ").strip()
    cleaned_sequences.append(processed_seq)

attr_scores = tok_out[0].target_attributions[2:prompt_last_index-1].tolist()
assert(len(cleaned_sequences) == len(attr_scores))

# Note: we only consider the maximum attribution score for each sentence
sent_scores = dict()
for seq_ix, seq in enumerate(cleaned_sequences):
    sent_scores[seq] = max(attr_scores[seq_ix])

# Extract top K important sentences
sorted_sent_scores = dict(sorted(sent_scores.items(), key=lambda x: x[1], reverse=True))
top_k_sents = list(sorted_sent_scores.keys())[:3]

print(top_k_sents)
for sent in top_k_sents:
    print(sent_scores[sent])

# Store both the attributed sentences and their aggregated scores
attributed_sents = []
for sent in top_k_sents:
    attributed_sents.append(
        {
            "input_sequence": sent,
            "score": sent_scores[sent]
        }
    )

print(attributed_sents)

['(CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories.', 'The Palestinians signed the ICC\'s founding Rome Statute in January, when they also accepted its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East Jerusalem, since June 13, 2014."', 'The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based.']
0.1950782984495163
0.029492268338799477
0.01984543912112713
[{'input_sequence': '(CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories.', 'score': 0.1950782984495163}, {'input_sequence': 'The Palestinians signed the ICC\'s founding Rome Statute in January, when they also accepted its jurisdiction o