In [1]:
from utils import DocumentDB
db = DocumentDB()
print("loaded")

loaded


In [2]:
from src import SemanticIndex, BM25Index

semantic_idx = SemanticIndex()
bm25_idx = BM25Index()

print("loaded")

loaded


In [6]:
import pytrec_eval
from utils import read_qrel_from_file, evaluate_run, extract_topics_from_file
topics = extract_topics_from_file("qrels/2020/topics_test.txt")
qrel = read_qrel_from_file("qrels/2020/cair2020_qrel.txt")

metrics: set = {'map', 'ndcg', 'P_5'}
evaluator = pytrec_eval.RelevanceEvaluator(qrel, metrics)

import numpy as np

def print_cum_stats(run):
    run_results = evaluator.evaluate(run)

    map_scores = [v["map"] for k,v in run_results.items()]
    p_scores  = [v["P_5"] for k,v in run_results.items()]
    ndcg_scores = [v['ndcg'] for k,v in run_results.items()]

    print("Aggregate results")
    print("Average MAP: ", np.mean(map_scores))
    print("Average P_5: ", np.mean(p_scores))
    print("Average NDCG: ", np.mean(ndcg_scores))


# Simple Re-Reranking: Combined count scores per input
Method: 

1. Treat title and narrative seperate 
2. Per input get results from semantic and bm25 index
3. Score is combined count


In [8]:
from collections import Counter
from tqdm.notebook import tqdm
title_run = {}
narrative_run = {}


for topic in tqdm(topics, total = len(topics)):

    # Title results 
    title = topic["title"].strip()
    sti, sts = semantic_idx.query(title, num_results=30)
    stid = db.batch_idx_lookup(sti)

    bti, bts = bm25_idx.query(title, num_results=30)
    btid = db.batch_idx_lookup(bti)

    tctr = Counter(stid+btid)
    title_run[topic["number"]] = dict(tctr.most_common(30))
    
    # Narrative results 
    narrative = topic["narrative"].strip()
    sni, sns = semantic_idx.query(narrative, num_results=30)
    snid = db.batch_idx_lookup(sni)
    
    bni, bns = bm25_idx.query(narrative, num_results=30)
    bnid = db.batch_idx_lookup(bni)
    
    nctr = Counter(snid+bnid)
    narrative_run[topic["number"]] = dict(nctr.most_common(30))

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




In [9]:
print("scores for 50 results")
print("Title scores")
print_cum_stats(title_run)

print("\nNarrative scores")
print_cum_stats(narrative_run)

scores for 50 results
Title scores
Aggregate results
Average MAP:  0.18495029357134296
Average P_5:  0.55
Average NDCG:  0.32942406287082926

Narrative scores
Aggregate results
Average MAP:  0.2168356381672904
Average P_5:  0.6000000000000001
Average NDCG:  0.35936977140805926


# Simple Re-Reranking: Combined scores per input
Method: 

1. Treat title and narrative seperate 
2. Per input get results from semantic and bm25 index
3. Score is bm25 score + sim score

In [10]:
from collections import Counter
from tqdm.notebook import tqdm
title_run = {}
narrative_run = {}


for topic in tqdm(topics, total = len(topics)):

    # Title results 
    title = topic["title"].strip()
    sti, sts = semantic_idx.query(title, num_results=30)
    stid = db.batch_idx_lookup(sti)
    stid_scores = dict(zip(stid, sts))

    bti, bts = bm25_idx.query(title, num_results=30)
    btid = db.batch_idx_lookup(bti)
    btid_scores = dict(zip(btid, bts))

    tctr = Counter(stid+btid)
    t_scores = {}
    for i in tctr:
        s_score = stid_scores.get(i, 0)
        b_score = btid_scores.get(i, 0)
        t_scores[i] = s_score + b_score
    title_run[topic["number"]] = t_scores
    
    # Narrative results 
    narrative = topic["narrative"].strip()
    sni, sns = semantic_idx.query(narrative, num_results=30)
    snid = db.batch_idx_lookup(sni)
    snid_scores = dict(zip(snid, sts))

    bni, bns = bm25_idx.query(narrative, num_results=30)
    bnid = db.batch_idx_lookup(bni)
    bnid_scores = dict(zip(bnid, bts))

    nctr = Counter(snid+bnid)
    n_scores = {}
    for i in nctr:
        s_score = snid_scores.get(i, 0)
        b_score = bnid_scores.get(i, 0)
        n_scores[i] = s_score + b_score
    narrative_run[topic["number"]] = n_scores
    

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




In [11]:
print("Title scores")
print_cum_stats(title_run)

print("\nNarrative scores")
print_cum_stats(narrative_run)

Title scores
Aggregate results
Average MAP:  0.2632030395272856
Average P_5:  0.56
Average NDCG:  0.4455656685203385

Narrative scores
Aggregate results
Average MAP:  0.3309275278638427
Average P_5:  0.72
Average NDCG:  0.5102306736334332


# Simple Re-Reranking: Combined counts for title and narrative
Method: 

1. Combine title and narrative 
2. Per input get results from semantic and bm25 index
3. Score is combined count across all results

In [12]:
from collections import Counter
from tqdm.notebook import tqdm
combined_run = {}

for topic in tqdm(topics, total = len(topics)):

    # Title results 
    title = topic["title"].strip()
    sti, sts = semantic_idx.query(title, num_results=30)
    stid = db.batch_idx_lookup(sti)
    bti, bts = bm25_idx.query(title, num_results=30)
    btid = db.batch_idx_lookup(bti)
    
    # Narrative results 
    narrative = topic["narrative"].strip()
    sni, sns = semantic_idx.query(narrative, num_results=30)
    snid = db.batch_idx_lookup(sni)
    
    bni, bns = bm25_idx.query(narrative, num_results=30)
    bnid = db.batch_idx_lookup(bni)
    
    ctr = Counter(stid + btid + snid + bnid)
    combined_run[topic["number"]] = dict(ctr.most_common(30))

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




In [13]:
print_cum_stats(combined_run)

Aggregate results
Average MAP:  0.2988690944149083
Average P_5:  0.71
Average NDCG:  0.4649682630922733


# Simple Re-Reranking: Combined scores across all inputs
Method: 

1. Consider title and narrative together
2. Per input get results from semantic and bm25 index
3. Score is bm25 score + sim score

In [14]:
from collections import Counter
from tqdm.notebook import tqdm

combined_run = {}

for topic in tqdm(topics, total = len(topics)):

    # Title results 
    title = topic["title"].strip()
    sti, sts = semantic_idx.query(title, num_results=30)
    stid = db.batch_idx_lookup(sti)
    stid_scores = dict(zip(stid, sts))

    bti, bts = bm25_idx.query(title, num_results=30)
    btid = db.batch_idx_lookup(bti)
    btid_scores = dict(zip(btid, bts))

    tctr = Counter(stid+btid)
    t_scores = {}
    for i in tctr:
        s_score = stid_scores.get(i, 0)
        b_score = btid_scores.get(i, 0)
        t_scores[i] = s_score + b_score
    
    # Narrative results 
    narrative = topic["narrative"].strip()
    sni, sns = semantic_idx.query(narrative, num_results=30)
    snid = db.batch_idx_lookup(sni)
    snid_scores = dict(zip(snid, sts))

    bni, bns = bm25_idx.query(narrative, num_results=30)
    bnid = db.batch_idx_lookup(bni)
    bnid_scores = dict(zip(bnid, bts))

    nctr = Counter(snid+bnid)
    n_scores = {}
    for i in nctr:
        s_score = snid_scores.get(i, 0)
        b_score = bnid_scores.get(i, 0)
        n_scores[i] = s_score + b_score

    combined_scores = {}
    all_keys = list(n_scores.keys()) + list(t_scores.keys())
    for key in all_keys:
        t_s = t_scores.get(key, 0)
        n_s = n_scores.get(key,0)
        combined_scores[key] = t_s + n_s

    combined_run[topic["number"]] = combined_scores
    


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




In [15]:
print_cum_stats(combined_run)

Aggregate results
Average MAP:  0.40540449823690966
Average P_5:  0.6900000000000001
Average NDCG:  0.6219014986596448


In [50]:
from flair.models import SequenceTagger
from flair.data import Sentence

model = SequenceTagger.load('resources/causal-tagger/best-model.pt')

s = Sentence("Sometime you need to be careful. Drugs are the leading cause of food-bourne illness and death.")
model.predict(s)
for entity in s.get_spans('ner'):
    print(entity)


2021-06-07 00:25:18,412 loading file resources/causal-tagger/best-model.pt
Span [8]: "Drugs"   [− Labels: CAUSE (0.9948)]
Span [17]: "death"   [− Labels: EFFECT (0.8831)]
