In [1]:
import os
import json
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch



  from .autonotebook import tqdm as notebook_tqdm


In [15]:
from thefuzz import process

import json

In [2]:
import re

In [3]:
from sklearn.metrics import precision_score, recall_score, f1_score

In [4]:
model_path = '/datasets/ai/llama3/hub/models--meta-llama--Llama-3.2-1B-Instruct/snapshots/9213176726f574b556790deb65791e0c5aa438b6'

In [23]:
model_paths = ['/datasets/ai/llama3/hub/models--meta-llama--Llama-3.2-1B-Instruct/snapshots/9213176726f574b556790deb65791e0c5aa438b6','datasets/ai/deepseek/hub/models--deepseek-ai--DeepSeek-R1-Distill-Qwen-1.5B/snapshots/530ca3e1ad39d440e182c2e4317aa40f012512fa','datasets/ai/t5/models--google--t5-v1_1-base/snapshots/b5fc947a416ea3cb079532cb3c2bbadeb7f800fc']

In [5]:
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16).to("cuda")  # Move model to CUDA

In [45]:
"""
    Cleans incomplete JSON response by:
    1. Extracting the valid JSON part.
    2. Removing incomplete entity objects.
    3. Returning a properly formatted JSON dictionary.
    4. Removes repeated entity titles in the dictionary.
"""
def clean_incomplete_json(response):
 
    match = re.search(r'\{.*\}', response, re.DOTALL)
    if not match:
        raise ValueError("No valid JSON-like content found in the response.")

    json_like_str = match.group(0)


    json_like_str = json_like_str.replace("'", '"')  # Ensure valid JSON quotes
    json_like_str = re.sub(r',\s*}', '}', json_like_str)  # Remove trailing commas before }
    json_like_str = re.sub(r',\s*\]', ']', json_like_str)  # Remove trailing commas before ]

   
    entities_match = re.search(r'"entities":\s*\[(.*)', json_like_str, re.DOTALL)
    if not entities_match:
        raise ValueError("No 'entities' key found in the response.")

    entities_str = entities_match.group(1).strip()

   
    entity_blocks = re.findall(r'\{[^{}]*\}', entities_str, re.DOTALL)

    valid_entities = []
    for block in entity_blocks:
        try:
            entity = json.loads(block)  # Attempt to parse each block
            valid_entities.append(entity)  # Keep valid entities
        except json.JSONDecodeError:
            print(f"Skipping invalid entity: {block}")  # Debugging output

   
    cleaned_response = {"entities": valid_entities}
     
    cleaned_response["entities"] = list({e["entity title"]: e for e in cleaned_response["entities"]}.values())

    return cleaned_response

In [41]:


def fuzzy_match(title, candidates, threshold=85):
    best_match, score = process.extractOne(title, candidates)
    return best_match if score >= threshold else None

def evaluate_salience(ground_truth, model_output, threshold=85):
    gt_dict = {}
    for item in ground_truth:
        title = item.get('entity title')
        salience = item.get('entity salience')

        if title:
            title = title.lower()
            try:
                gt_dict[title] = int(salience) if salience is not None else 0
            except ValueError:
                gt_dict[title] = 0

    pred_dict = {}
    for item in model_output:
        title = item.get('entity title')
        salience = item.get('entity salience')

        if title:
            title = title.lower()
            try:
                pred_dict[title] = int(salience) if salience is not None else 0
            except ValueError:
                pred_dict[title] = 0
        else:
            print("Weird data : ",item)

    # Apply fuzzy matching for better entity alignment
    updated_pred_dict = {}
    for pred_title in pred_dict:
        matched_title = fuzzy_match(pred_title, list(gt_dict.keys()), threshold)
        if matched_title:
            updated_pred_dict[matched_title] = pred_dict[pred_title]
        else:
            updated_pred_dict[pred_title] = pred_dict[pred_title]

    all_entities = set(gt_dict.keys()).union(set(updated_pred_dict.keys()))
    y_true = [gt_dict.get(entity, 0) for entity in all_entities]
    y_pred = [updated_pred_dict.get(entity, 0) for entity in all_entities]

    return y_true, y_pred

def evaluate_multiple_instances(ground_truths, model_outputs, threshold=85):
    all_y_true = []
    all_y_pred = []
    
    for gt, mo in zip(ground_truths, model_outputs):
        y_true, y_pred = evaluate_salience(gt, mo, threshold)
        all_y_true.extend(y_true)
        all_y_pred.extend(y_pred)
        
    precision = precision_score(all_y_true, all_y_pred, average='macro')
    recall = recall_score(all_y_true, all_y_pred, average='macro')
    f1 = f1_score(all_y_true, all_y_pred, average='macro')
    
    # Compute accuracy based on fuzzy-matched salient entities
    all_salient_correct = all(
        gt_dict.get(entity, 0) == 1 and updated_pred_dict.get(entity, 0) == 1
        for gt, mo in zip(ground_truths, model_outputs)
        for gt_dict, updated_pred_dict in [
            (
                {fuzzy_match(item['entity title'].lower(), [x['entity title'].lower() for x in mo], threshold): int(item['entity salience']) for item in gt},
                {item['entity title'].lower(): int(item['entity salience']) for item in mo}
            )
        ]
        for entity in gt_dict if gt_dict[entity] == 1
    )

    accuracy = 1.0 if all_salient_correct else 0.0
    
    return {"precision": precision, "recall": recall, "f1": f1, "accuracy": accuracy}

In [26]:
######WORKS
# def evaluate_salience(ground_truth, model_output):
#     gt_dict = {}
#     for item in ground_truth:
#         title = item.get('entity title')
#         salience = item.get('entity salience')

#         if title is not None:  # Check if 'entity title' exists
#             title = title.lower() # Lowercase only if title exists
#             try:
#                 salience = int(salience) if salience is not None else 0
#                 gt_dict[title] = salience
#             except ValueError:
#                 print(f"Warning: Could not convert salience to integer for title: {title}. Setting to 0.")
#                 gt_dict[title] = 0
#         else:
#             print("Warning: Found an item in ground_truth without an 'entity title'. Skipping.", item)


#     pred_dict = {}
#     for item in model_output:
#         title = item.get('entity title')
#         salience = item.get('entity salience')

#         if title is not None:
#             title = title.lower()
#             try:
#                 salience = int(salience) if salience is not None else 0
#                 pred_dict[title] = salience
#             except ValueError:
#                 print(f"Warning: Could not convert salience to integer for title: {title}. Setting to 0.")
#                 pred_dict[title] = 0
#         else:
#             print("Warning: Found an item in model_output without an 'entity title'. Skipping.")

#     all_entities = set(gt_dict.keys()).union(set(pred_dict.keys()))
#     y_true = [gt_dict.get(entity, 0) for entity in all_entities]
#     y_pred = [pred_dict.get(entity, 0) for entity in all_entities]

#     return y_true, y_pred

# def evaluate_multiple_instances(ground_truths, model_outputs):
#     all_y_true = []
#     all_y_pred = []
    
#     for gt, mo in zip(ground_truths, model_outputs):
#         y_true, y_pred = evaluate_salience(gt, mo)
#         all_y_true.extend(y_true)
#         all_y_pred.extend(y_pred)
        
#     precision = precision_score(all_y_true, all_y_pred, average='macro')  # or 'micro', 'weighted'
#     recall = recall_score(all_y_true, all_y_pred, average='macro')      # or 'micro', 'weighted'
#     f1 = f1_score(all_y_true, all_y_pred, average='macro')          
    
#     # precision = precision_score(all_y_true, all_y_pred)
#     # recall = recall_score(all_y_true, all_y_pred)
#     # f1 = f1_score(all_y_true, all_y_pred)
    
#     all_salient_correct = all(
#         gt_dict.get(entity, 0) == 1 and pred_dict.get(entity, 0) == 1 
#         for gt, mo in zip(ground_truths, model_outputs)
#         for gt_dict, pred_dict in [(dict((item['entity title'].lower(), int(item['entity salience'])) for item in gt),
#                                     dict((item['entity title'].lower(), int(item['entity salience'])) for item in mo))]
#         for entity in gt_dict if gt_dict[entity] == 1
#     )
   
#     accuracy = 1.0 if all_salient_correct else 0.0
    
#     return {"precision": precision, "recall": recall, "f1": f1, "accuracy": accuracy}

In [20]:
#file_path = "work/pi_wenlongzhao_umass_edu/8/tej/696-detecting-salient-entities/data/article_info.json"
file_path = "./data/article_info.json"

In [21]:
with open(file_path, "r") as f:
    data = json.load(f)
df = pd.DataFrame(data)

In [22]:
df['entities'][0]

[{'entity title': 'United States', 'entity salience': '1'},
 {'entity title': 'Washington, D.C.', 'entity salience': '1'},
 {'entity title': 'same-sex marriage', 'entity salience': '0'},
 {'entity title': 'Adrian Fenty', 'entity salience': '0'},
 {'entity title': 'federal district', 'entity salience': '0'},
 {'entity title': 'family values', 'entity salience': '0'},
 {'entity title': 'John Roberts', 'entity salience': '0'}]

In [47]:
outputs = []
articles = list(df['text'])
titles = list(df['title'])
prompt_template = '''
Article Title: {title}
Article Text: {article}

You are an expert news article analysis assistant. Given an article's title and its text, your task is to extract the entities 
mentioned in the article and assign a salience score to each entity. The salience score is defined as follows:
• 1: The entity is central to the article's content.
• 0: The entity is mentioned but is not central.

Follow these rules:
1. Only extract entities that are explicitly mentioned in the article.
2. Do not include any entities that do not appear in the article.
3. Use only the provided title and text to determine salience.
4. Format your answer as valid JSON exactly as specified below.
5. Do not infinitely loop on the same words for any of the entity titles.
6. Every entity title should appear only once.

The expected JSON format is:
{{
  "entities": [
    {{
      "entity title": "<entity_name>",
      "entity salience": "<0 or 1>"
    }},
    ...
  ]
}}

Now, based on the article above, return only the final JSON output (with no extra commentary).
'''


####RUNS ZERO SHOT FOR EVERY MODEL SPECIFIED IN MODEL PATHS###########
i = 0
n = 5 #num instances
for model_path in model_paths[:1]:
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16).to("cuda")  # Move model to CUDA
    for title,article in zip(titles[:n],articles[:n]):
        prompt = prompt_template.format(title=title, article=article)
        input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
        
        # Generate response
        output_ids = model.generate(input_ids, max_new_tokens=1000)

        # Decode and print the response
        response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
        
        response = response.split("Now, based on the article above, return only the final JSON output (with no extra commentary).")[-1]
        result = clean_incomplete_json(response)
        print("Iteration", i)
        i+=1
        outputs.append(result['entities'])

f = open('outputs.txt','w+')
f.write(str(outputs))
f.close()

metrics = evaluate_multiple_instances(df['entities'][:n],outputs)
print("METRICS",metrics)
print("\n")
print(outputs)
        

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Iteration 0


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Iteration 1


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Iteration 2


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Iteration 3
Iteration 4
METRICS {'precision': 0.4617554858934169, 'recall': 0.4175675675675675, 'f1': 0.4156231365533691, 'accuracy': 0.0}


[[{'entity title': 'Washington, D.C.', 'entity salience': '1'}, {'entity title': 'United States capital of Washington, D.C.', 'entity salience': '1'}, {'entity title': 'Same-sex marriage', 'entity salience': '1'}, {'entity title': 'Washington D.C.', 'entity salience': '1'}, {'entity title': 'Mayor Adrian Fenty', 'entity salience': '1'}, {'entity title': 'Congress', 'entity salience': '1'}, {'entity title': 'Supreme Court Chief Justice John Roberts', 'entity salience': '1'}, {'entity title': 'December', 'entity salience': '0'}, {'entity title': 'United States', 'entity salience': '0'}, {'entity title': 'Supreme Court', 'entity salience': '0'}, {'entity title': 'John Roberts', 'entity salience': '0'}, {'entity title': 'Washington', 'entity salience': '0'}, {'entity title': 'D.C.', 'entity salience': '0'}, {'entity title': 'John F. Kennedy', 'entity 

In [32]:
outputs[2]

[{'entity title': 'United States capital of Washington, D.C.',
  'entity salience': '1'},
 {'entity title': 'Washington, D.C.', 'entity salience': '1'},
 {'entity title': 'United States territory', 'entity salience': '1'},
 {'entity title': 'Same-sex marriage', 'entity salience': '1'},
 {'entity title': 'Washington D.C.', 'entity salience': '1'},
 {'entity title': 'Mayor Adrian Fenty', 'entity salience': '1'},
 {'entity title': 'Supreme Court Chief Justice John Roberts',
  'entity salience': '1'},
 {'entity title': 'Congress', 'entity salience': '0'},
 {'entity title': 'December', 'entity salience': '0'}]

{'precision': 0.6140350877192983,
 'recall': 0.5886363636363636,
 'f1': 0.5982142857142857,
 'accuracy': 0.0}

In [33]:
df['entities'][2], outputs[2]

([{'entity title': 'Canadian', 'entity salience': '1'},
  {'entity title': 'Air Canada', 'entity salience': '0'},
  {'entity title': 'Aveos Fleet Performance', 'entity salience': '0'},
  {'entity title': 'Montreal', 'entity salience': '0'},
  {'entity title': 'Winnipeg', 'entity salience': '0'},
  {'entity title': 'Vancouver', 'entity salience': '0'},
  {'entity title': 'Airbus A319 and A320', 'entity salience': '0'}],
 [{'entity title': 'United States capital of Washington, D.C.',
   'entity salience': '1'},
  {'entity title': 'Washington, D.C.', 'entity salience': '1'},
  {'entity title': 'United States territory', 'entity salience': '1'},
  {'entity title': 'Same-sex marriage', 'entity salience': '1'},
  {'entity title': 'Washington D.C.', 'entity salience': '1'},
  {'entity title': 'Mayor Adrian Fenty', 'entity salience': '1'},
  {'entity title': 'Supreme Court Chief Justice John Roberts',
   'entity salience': '1'},
  {'entity title': 'Congress', 'entity salience': '0'},
  {'entit

In [None]:
#######TEST FOR ONE ARTICLE

In [28]:
####PROMPT 1##

context_str = '''The United States capital of Washington, D.C. legalized same-sex marriage on Wednesday. Beginning at 6 A.M. local time (1100 UTC), couples began submitting marriage applications at local courthouses citywide.Washington D.C. becomes the seventh United States territory to legalize same sex marriage. The bill was ratified by Mayor Adrian Fenty last December. Due to city's territorial status as a federal district, the bill had to be reviewed by congress. The bill passed congressional review Tuesday night.The bill faced opposition from many family values activists, who tried to stop the bill from becoming law. Supreme Court Chief Justice John Roberts rejected a lawsuit to prevent the measure.'''
prompt = '''
You are an editor for a newspaper who has to identify the most critical pieces of
,→ information when writing the headline for an article.
For this task you are given a question-answer pair as Context and a list of
entities from the text. Read the Context given in triple backticks and rate
how salient each entity is to the Context. Before answering provide a short
justification for your answer.
,→
,→
,→
Provide a salience score of 0 or 1 where 0 is non-salient and 1 is
,→ salient.
Provide a categorical rating from the following options:
High - The entity is strongly related to the main point of the question-answer
,→ pair or is the answer itself.
Moderate - The entity is related to the question-answer pair but it is not the
,→ most important part.
Low - The entity not related or is only tangentially or superficially related
,→ to the question-answer pair.
Countries (especially in reference to nationality) are frequently incidental to the
answer and are most often “Low” salience unless directly related to the
question.
,→
,→
Give your answer as valid JSON in the following format:
[
{{
"entity": <entity_name>,
"explanation": <explanation of the rating>,
"rating": <rating>,
"score": <score>,
}}
]
Context: ```{context_str}```
List of entities: {entity_str}
Answer:
'''

# Tokenize the prompt and move input to CUDA
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")

# Generate response
output_ids = model.generate(input_ids, max_length=500)

# Decode and print the response
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
print(response)

OSError: Incorrect path_or_model_id: 'datasets/ai/deepseek/hub/models--deepseek-ai--DeepSeek-R1-Distill-Qwen-1.5B/snapshots/530ca3e1ad39d440e182c2e4317aa40f012512fa'. Please provide either the path to a local folder or the repo_id of a model on the Hub.

In [13]:
#####PROMPT2#####

article = '''The United States capital of Washington, D.C. legalized same-sex marriage on Wednesday. Beginning at 6 A.M. local time (1100 UTC), couples began submitting marriage applications at local courthouses citywide.Washington D.C. becomes the seventh United States territory to legalize same sex marriage. The bill was ratified by Mayor Adrian Fenty last December. Due to city's territorial status as a federal district, the bill had to be reviewed by congress. The bill passed congressional review Tuesday night.The bill faced opposition from many family values activists, who tried to stop the bill from becoming law. Supreme Court Chief Justice John Roberts rejected a lawsuit to prevent the measure.'''
title = '''Laws allowing same sex marriage in Washington, D.C. go into effect'''
prompt = f'''
Article Title: {title}
Article Text: {article}

You are an expert news article analysis assistant. Given an article's title and its text, your task is to extract the entities 
mentioned in the article and assign a salience score to each entity. The salience score is defined as follows:
•⁠  ⁠1: The entity is central to the article's content.
•⁠  ⁠0: The entity is mentioned but is not central.

Follow these rules:
1.⁠ ⁠Only extract entities that are explicitly mentioned in the article.
2.⁠ ⁠Do not include any entities that do not appear in the article.
3.⁠ ⁠Use only the provided title and text to determine salience.
4.⁠ ⁠Format your answer as valid JSON exactly as specified below.
5. Do not infinitely loop on the same words for any of the entity titles
6. Every entity title should appear only once

The expected JSON format is:
{{
  "entities": [
    {{
      "entity title": "<entity_name>",
      "entity salience": "<0 or 1>"
    }},
    ...
  ]
}}

Now, based on the article above, return only the final JSON output (with no extra commentary).
'''
# Tokenize the prompt and move input to CUDA
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")

# Generate response
output_ids = model.generate(input_ids, max_length=1000)

# Decode and print the response
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
response = response.split("Now, based on the article above, return only the final JSON output (with no extra commentary).")[-1]
result = clean_incomplete_json(response)
print("RESPONSE",result)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


RESPONSE {'entities': [{'entity title': 'United States capital of Washington, D.C.', 'entity salience': '1'}, {'entity title': 'Washington, D.C.', 'entity salience': '1'}, {'entity title': 'United States territory', 'entity salience': '1'}, {'entity title': 'Same-sex marriage', 'entity salience': '1'}, {'entity title': 'Washington D.C.', 'entity salience': '1'}, {'entity title': 'Mayor Adrian Fenty', 'entity salience': '1'}, {'entity title': 'Supreme Court Chief Justice John Roberts', 'entity salience': '1'}, {'entity title': 'Congress', 'entity salience': '0'}, {'entity title': 'December', 'entity salience': '0'}]}


In [11]:
result

{'entities': [{'entity title': 'United States capital of Washington, D.C.',
   'entity salience': '1'},
  {'entity title': 'Washington, D.C.', 'entity salience': '1'},
  {'entity title': 'United States territory', 'entity salience': '1'},
  {'entity title': 'Same-sex marriage', 'entity salience': '1'},
  {'entity title': 'Washington D.C.', 'entity salience': '1'},
  {'entity title': 'Mayor Adrian Fenty', 'entity salience': '1'},
  {'entity title': 'Supreme Court Chief Justice John Roberts',
   'entity salience': '1'},
  {'entity title': 'Congress', 'entity salience': '0'},
  {'entity title': 'December', 'entity salience': '0'}]}

In [13]:
dict(response.split('''{
  "entities": [
    ''')[-1])

ValueError: dictionary update sequence element #0 has length 1; 2 is required

In [20]:
import json
import re

def clean_incomplete_json(response: str):
    """
    Cleans an incomplete JSON response by:
    1. Extracting the valid JSON part.
    2. Removing incomplete entity objects.
    3. Returning a properly formatted JSON dictionary.
    """
    # Extract the JSON-like substring
    match = re.search(r'\{.*\}', response, re.DOTALL)
    if not match:
        raise ValueError("No valid JSON-like content found in the response.")

    json_like_str = match.group(0)

    # Fix common JSON formatting issues
    json_like_str = json_like_str.replace("'", '"')  # Ensure valid JSON quotes
    json_like_str = re.sub(r',\s*}', '}', json_like_str)  # Remove trailing commas before }
    json_like_str = re.sub(r',\s*\]', ']', json_like_str)  # Remove trailing commas before ]

    # Find the entities list
    entities_match = re.search(r'"entities":\s*\[(.*)', json_like_str, re.DOTALL)
    if not entities_match:
        raise ValueError("No 'entities' key found in the response.")

    entities_str = entities_match.group(1).strip()

    # Extract individual entity blocks using regex
    entity_blocks = re.findall(r'\{[^{}]*\}', entities_str, re.DOTALL)

    valid_entities = []
    for block in entity_blocks:
        try:
            entity = json.loads(block)  # Attempt to parse each block
            valid_entities.append(entity)  # Keep valid entities
        except json.JSONDecodeError:
            print(f"Skipping invalid entity: {block}")  # Debugging output

    # Construct valid JSON response
    cleaned_response = {"entities": valid_entities}

    return cleaned_response

# Example incomplete response
response = '''{
  "entities": [
    {
      "entity title": "Washington, D.C.",
      "entity salience": "1"
    },
    {
      "entity title": "United States capital",
      "entity salience": "1"
    },
    {
      "entity title": "Same-sex marriage",
      "entity salience": "1"
      },'''  # Incomplete object

try:
    parsed_response = clean_incomplete_json(response)
    print(json.dumps(parsed_response, indent=2))
except ValueError as e:
    print(f"Error: {e}")


{
  "entities": [
    {
      "entity title": "Washington, D.C.",
      "entity salience": "1"
    },
    {
      "entity title": "United States capital",
      "entity salience": "1"
    },
    {
      "entity title": "Same-sex marriage",
      "entity salience": "1"
    }
  ]
}


In [30]:
mod = {
  "entities": [
    {
      "entity title": "Washington, D.C.",
      "entity salience": "1"
    },
    {
      "entity title": "United States capital",
      "entity salience": "1"
    },
    {
      "entity title": "Same-sex marriage",
      "entity salience": "1"
    }
  ]
}

In [31]:
gt = [{'entity title': 'United States', 'entity salience': '1'},
 {'entity title': 'Washington, D.C.', 'entity salience': '1'},
 {'entity title': 'same-sex marriage', 'entity salience': '0'},
 {'entity title': 'Adrian Fenty', 'entity salience': '0'},
 {'entity title': 'federal district', 'entity salience': '0'},
 {'entity title': 'family values', 'entity salience': '0'},
 {'entity title': 'John Roberts', 'entity salience': '0'}]

In [32]:
evaluate_salience(gt,mod["entities"])

TypeError: list indices must be integers or slices, not str