In [1]:
# a function to grab a single fact from json file
import json
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from factcheck import EntailmentFactChecker, EntailmentModel 
from transformers import AutoConfig

# Initialize the EntailmentFactChecker
model_name = "MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli"
ent_tokenizer = AutoTokenizer.from_pretrained(model_name)
roberta_ent_model = AutoModelForSequenceClassification.from_pretrained(model_name)
ent_model = EntailmentModel(roberta_ent_model, ent_tokenizer)
fact_checker = EntailmentFactChecker(ent_model)

config = AutoConfig.from_pretrained(model_name)
print(config.max_position_embeddings)

def test_fact_from_file(file_path, fact_name, threshold):
    with open(file_path, 'r') as file:
        lines = file.readlines()
        data = [json.loads(line) for line in lines]
        
    # Search for the fact based on the "sent" field
    fact_data = next((entry for entry in data if entry['sent'] == fact_name), None)
    
    if not fact_data:
        print(f"Error: No fact found with name '{fact_name}'.")
        return
    
    fact = fact_data['sent']
    passages = fact_data['passages']
    
    result = fact_checker.check_fact(fact_checker.clean_text(fact), passages, threshold=threshold)

    # Append the original passages and sentences to the result
    result['original_passages'] = passages

    return result


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\dgturner01\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\dgturner01\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


512


In [3]:
import html
import json
from IPython.display import display, HTML

# Custom CSS to enable word wrapping for <pre> tags
custom_css = """
<style>
    pre {
        white-space: pre-wrap;
    }
</style>
"""

display(HTML(custom_css))

def display_json(data):
    formatted_json = html.escape(json.dumps(data, indent=4))
    display(HTML(f"<pre>{formatted_json}</pre>"))

# for interactive fact testing
file_path = "data/passages_bm25_ChatGPT_humfacts.jsonl"  # Replace with your file path
fact_name = "Maracaibo is in Venezuela."

t = 0.60
result = test_fact_from_file(file_path, fact_name, threshold=t)
display_json(result)
