In [28]:
import sys

sys.path.append("../backend")

In [29]:
DATASET = "test"
RESULT_FILE = f"../data/generated/results/{DATASET}_reciprocal_cross_encoder.json"

print(f'RESULT_FILE: {RESULT_FILE}')

RESULT_FILE: ../data/generated/results/test_reciprocal_cross_encoder.json


In [30]:
from backend.evals.squad_eval_script import Evaluator
import re

def remove_numbers_in_parentheses(s):
    return re.sub(r'\(\d{1,2}\)', '', s)

def replace_multiple_spaces(s):
    return re.sub(r' +', ' ', s)

def remove_numbers_followed_by_dot(s):
    return re.sub(r'\d+\.', '', s)

def clean_text(text: str):
    text = remove_numbers_in_parentheses(text)
    text = replace_multiple_spaces(text)
    text = remove_numbers_followed_by_dot(text)
    text = text.replace("\n", " ").replace("\t", " ").replace("\r", " ")
    text = text.replace("  ", " ")
    text = text.strip()
    text = text.lower()
    text = " ".join(text.split())
    if text.endswith("."):
        text = text[:-1]
    text = Evaluator.CleanAnswer(text)
    return text


In [31]:
def add_found_record(results: list, found_record: dict):
    record = {
        "id": found_record["id"],
        "question": found_record["question"],
        "answer": found_record["answers"][0]["text"],
        "pred_answer": found_record["pred_answer"],
        "context": "\n".join(map(lambda x: x["text"], found_record["relevant_embeddings"])),
    }
    results.append(record)

def get_pred_not_match_answer(results: dict):
    records = []
    for item in results["data"]:
        pred_answer = item["pred_answer"]
        #if any(answer for answer in item["answers"] if answer["text"] == pred_answer):
        if clean_text(item["answers"][0]["text"]) == clean_text(pred_answer):
            continue
        else:
            add_found_record(records, item)
    return records


def get_answer_not_in_pred(results: dict):
    records = []
    for item in results["data"]:
        pred_answer = item["pred_answer"]
        #if any(answer for answer in item["answers"] if answer["text"] in pred_answer):
        if clean_text(item["answers"][0]["text"]) in clean_text(pred_answer):
            continue
        else:
            add_found_record(records, item)
    return records

def get_not_contained_bi(results: dict):
    records = []
    for item in results["data"]:
        pred_answer = item["pred_answer"]
        if clean_text(pred_answer) in clean_text(item["answers"][0]["text"]) or clean_text(item["answers"][0]["text"]) in clean_text(pred_answer):
            continue
        else:
            add_found_record(records, item)
    return records

def get_pred_not_in_context(results: dict):
    records = []
    for item in results["data"]:
        pred_answer = item["pred_answer"]
        if any(embedding for embedding in item["relevant_embeddings"] if clean_text(pred_answer) in clean_text(embedding["text"])):
            continue
        else:
            add_found_record(records, item)
    return records
    

In [32]:
import json
with open(RESULT_FILE, "r", encoding='utf-8') as f:
    results = json.load(f)

In [33]:
results["data"][0]

{'title': 'REGULATION (EU) 2019/2088 OF THE EUROPEAN PARLIAMENT AND OF THE COUNCIL of 27 November 2019 on sustainability‐related disclosures in the financial services sector (Text with EEA relevance)',
 'id': '390bed57-a631-4986-a794-902e948bf685',
 'question': 'When was the REGULATION (EU) 2019/2088 OF THE EUROPEAN PARLIAMENT AND OF THE COUNCIL adopted?',
 'answers': [{'text': 'of 27 November 2019'}],
 'pred_answer': '27 November 2019',
 'relevant_embeddings': [{'embedding_id': '7ea70360-55f4-4f73-aff2-6e8d48b91fd0',
   'rank': 1,
   'title': 'REGULATION (EU) 2019/2088 OF THE EUROPEAN PARLIAMENT AND OF THE COUNCIL of 27 November 2019 on sustainability‐related disclosures in the financial services sector (Text with EEA relevance)',
   'offset': 0,
   'score': 5.757898330688477,
   'score_type': 'cross_encoder',
   'reranked': True,
   'text': '9.12.2019\n\nUnion\n\nI\n\n(Legislative acts)\n\nREGULATIONS\n\nREGULATION (EU) 2019/2088 OF THE EUROPEAN PARLIAMENT AND OF THE COUNCIL\n\nof 27

In [34]:
num_questions = len(results["data"])
num_questions

263

In [35]:
records_not_exact_match = get_pred_not_match_answer(results)
records_not_contained = get_answer_not_in_pred(results)
records_not_contained_bi = get_not_contained_bi(results)
records_not_in_context = get_pred_not_in_context(results)

In [36]:
print(f'num preds with no exact match {len(records_not_exact_match)}')
print(f'num preds with no contained match {len(records_not_contained)}')
print(f'num preds with no contained match bi {len(records_not_contained_bi)}')
print(f'num preds with no contained in context {len(records_not_in_context)}')

num preds with no exact match 142
num preds with no contained match 67
num preds with no contained match bi 54
num preds with no contained in context 63


In [37]:
# Intersection of records_not_contained_bi and records_not_in_context
records_not_contained_bi_set = set([record["id"] for record in records_not_contained_bi])
records_not_contained_set = set([record["id"] for record in records_not_contained])
records_not_in_context_set = set([record["id"] for record in records_not_in_context])
#intersection = records_not_contained_bi_set.intersection(records_not_in_context_set)
intersection = records_not_contained_set.intersection(records_not_in_context_set)
len(intersection)

36

In [38]:
intersection_results = []

for i in intersection:
    for item in results["data"]:
        if item["id"] == i:
            add_found_record(intersection_results, item)
            break

In [39]:
len(intersection_results)

36

In [40]:
import pandas as pd

intersection_df = pd.DataFrame(intersection_results)
df_not_exact_match = pd.DataFrame(records_not_exact_match)
df_not_contained = pd.DataFrame(records_not_contained)
df_not_contained_bi = pd.DataFrame(records_not_contained_bi)

df_not_exact_match_diff_not_contained = df_not_exact_match[~df_not_exact_match.id.isin(df_not_contained.id)]

In [41]:
len(df_not_exact_match_diff_not_contained)

75

In [42]:
intersection_df

Unnamed: 0,id,question,answer,pred_answer,context
0,5163ebf6-24d1-45d2-94e1-ce785af6bfba,"Who should finance the costs of collecting, tr...","Accordingly, they should finance the costs of ...",Producers should finance the costs of collecti...,producer responsibility management at stage. A...
1,7a5a6bbf-46c6-45aa-902b-008b9f0559e3,What must be reviewed once a year and on an ad...,The effectiveness of the remedial action must ...,"The effectiveness of the preventive measures, ...",4. agreeing on appropriate contractual control...
2,9f4efd09-4eb9-4b3b-b08c-87c7934807fd,What is the initial obligation of the statutor...,Starting with an obligation on the statutory a...,The initial obligation of the statutory audito...,"Therefore, a progressive approach to enhancing..."
3,f93fa4c2-2efd-4581-a252-d04006874738,What information should financial market parti...,Financial market participants shall publish an...,Financial market participants shall publish an...,Article 10\n\nTransparency of the promotion of...
4,a58ec455-50a8-485d-87bf-361fd09e487a,What measures should Member States take regard...,Member States shall take the necessary measure...,Member States shall take the necessary measure...,"3. Member States may decide, in accordance wit..."
5,2eec284c-da76-4664-a4d4-a0b3997d3644,What must financial undertakings disclose?,Financial undertakings shall disclose: the pro...,Financial undertakings shall include all addit...,"Article 8(1) of the Taxonomy Regulation, provi..."
6,34b17fac-162d-4c2d-b0c9-ee8638b67274,When does Directive 2006/66/EC get repealed an...,Directive 2006/66/EC is repealed with effect f...,Directive 2006/66/EC is repealed with effect f...,The Commission shall publish a report containi...
7,16f4d73c-0c19-4464-8fac-8ee83c735200,What is the purpose of Regulation (EC) No 765/...,Regulation (EC) No 765/2008 of the European Pa...,Regulation (EC) No 765/2008 of the European Pa...,(51) Regulation (EC) No 765/2008 of the Europe...
8,42e91dd1-d89f-4132-b482-d0e44aa0478c,What is the purpose of the Delegated Act?,The rules set out in the Delegated Act allows ...,The Delegated Act specifies the disclosure obl...,The Delegated Act specifies the disclosure obl...
9,a55d1847-bb89-4227-9763-c75d96fd3867,Who is required to publish a sustainability re...,A Member State shall require that a subsidiary...,A Member State shall require that a subsidiary...,Sustainability reports concerning third-countr...


In [43]:
#q = df_not_exact_match_diff_not_contained.iloc[1].question
#p = df_not_exact_match_diff_not_contained.iloc[1].pred_answer
#a = df_not_exact_match_diff_not_contained.iloc[1].answer
#c = df_not_exact_match_diff_not_contained.iloc[1].context

In [44]:
q = clean_text(intersection_df.iloc[2].question)
p = clean_text(intersection_df.iloc[2].pred_answer)
a = clean_text(intersection_df.iloc[2].answer)
c = clean_text(intersection_df.iloc[2].context)

In [45]:
print(f'Q: {q}')
print(f'P: {p}')
print(f'A: {a}')
print(f'C: {c}')

Q: what is the initial obligation of the statutory auditor or audit firm regarding sustainability reporting
P: initial obligation of the statutory auditor or audit firm is to express an opinion about the compliance of the sustainability reporting with union requirements based on a limited assurance engagement. that opinion should cover the compliance of the sustainability reporting with union sustainability reporting standards, the process carried out by the undertaking to identify the information reported pursuant to the sustainability reporting standards and compliance with the requirement to mark up sustainability reporting. the auditor should also assess whether the undertaking’s reporting complies with the reporting requirements of article 8 of regulation (eu) 2020/
A: starting with an obligation on the statutory auditor or audit firm to express an opinion about the compliance of the sustainability reporting with union requirements based on a limited assurance engagement
C: theref

In [46]:
def is_every_text_sentence_in_context(context, text):
    return all(clean_text(sentence) in clean_text(context) for sentence in text.split("."))


ids_not_sentences_in_context = []
for i, id in enumerate(intersection_df.id):
    record = intersection_df.iloc[i]
    if not is_every_text_sentence_in_context(record.context, record.pred_answer):
        ids_not_sentences_in_context.append(id)

In [47]:
len(ids_not_sentences_in_context)

31

In [48]:
sentences_not_in_context_df = intersection_df[intersection_df.id.isin(ids_not_sentences_in_context)]

In [49]:
sentences_not_in_context_df

Unnamed: 0,id,question,answer,pred_answer,context
0,5163ebf6-24d1-45d2-94e1-ce785af6bfba,"Who should finance the costs of collecting, tr...","Accordingly, they should finance the costs of ...",Producers should finance the costs of collecti...,producer responsibility management at stage. A...
1,7a5a6bbf-46c6-45aa-902b-008b9f0559e3,What must be reviewed once a year and on an ad...,The effectiveness of the remedial action must ...,"The effectiveness of the preventive measures, ...",4. agreeing on appropriate contractual control...
2,9f4efd09-4eb9-4b3b-b08c-87c7934807fd,What is the initial obligation of the statutor...,Starting with an obligation on the statutory a...,The initial obligation of the statutory audito...,"Therefore, a progressive approach to enhancing..."
3,f93fa4c2-2efd-4581-a252-d04006874738,What information should financial market parti...,Financial market participants shall publish an...,Financial market participants shall publish an...,Article 10\n\nTransparency of the promotion of...
4,a58ec455-50a8-485d-87bf-361fd09e487a,What measures should Member States take regard...,Member States shall take the necessary measure...,Member States shall take the necessary measure...,"3. Member States may decide, in accordance wit..."
6,34b17fac-162d-4c2d-b0c9-ee8638b67274,When does Directive 2006/66/EC get repealed an...,Directive 2006/66/EC is repealed with effect f...,Directive 2006/66/EC is repealed with effect f...,The Commission shall publish a report containi...
7,16f4d73c-0c19-4464-8fac-8ee83c735200,What is the purpose of Regulation (EC) No 765/...,Regulation (EC) No 765/2008 of the European Pa...,Regulation (EC) No 765/2008 of the European Pa...,(51) Regulation (EC) No 765/2008 of the Europe...
9,a55d1847-bb89-4227-9763-c75d96fd3867,Who is required to publish a sustainability re...,A Member State shall require that a subsidiary...,A Member State shall require that a subsidiary...,Sustainability reports concerning third-countr...
11,71005d2f-bd01-4002-b85c-9848fc943849,What is the proportion of the insurance or rei...,The proportion of the insurance or reinsurance...,The proportion of the insurance or reinsurance...,Template: The proportion of the insurance or r...
13,b0837960-cdac-49ea-aab4-b7919560ef65,What are the requirements for a conformity ass...,A conformity assessment body and its personnel...,"For the purposes of notification, a conformity...",Article 24\n\nInformation obligation on notify...


In [50]:
q = clean_text(sentences_not_in_context_df.iloc[0].question)
p = clean_text(sentences_not_in_context_df.iloc[0].pred_answer)
a = clean_text(sentences_not_in_context_df.iloc[0].answer)
c = clean_text(sentences_not_in_context_df.iloc[0].context)

In [51]:
print(f'Q: {q}')
print(f'P: {p}')
print(f'A: {a}')
print(f'C: {c}')

Q: who should finance the costs of collecting, treating and recycling all collected batteries
P: producers should finance the costs of collecting, treating and recycling all collected batteries
A: accordingly, they should finance the costs of collecting, treating and recycling all collected batteries
C: producer responsibility management at stage. accordingly, they should finance the costs of collecting, treating and recycling all collected batteries, carrying out compositional surveys of mixed collected municipal waste, reporting on batteries and waste batteries, and of providing information to end-users and waste operators about batteries and appropriate re-use and management of waste batteries. the new rules on extended producer responsibility under this regulation are intended to ensure a high level of environmental and health protection in the union by maximising separate collection of waste batteries and ensuring that all collected batteries are recycled through processes that re