# 💬 Claimify reproduction study

In [1]:
# autoreload imports
%load_ext autoreload
%autoreload 2

In [2]:
from utils.chat import get_prompt, load_md_prompts
from langchain_openai import ChatOpenAI
from langchain_ollama import ChatOllama
from typing import List
import nltk
from utils.claimify import ClaimifyPipeline
from langchain.chat_models import init_chat_model

## 1. Input Question and Answer

In [3]:
get_prompt("claimify", "decomposition", {"excerpt": "excerpt", "question": "question", "sentence": "sentence"})

[SystemMessage(content='You are an assistant for a group of fact-checkers. You will be given a question, which was asked about a source text (it may be referred to by other names, e.g., a dataset). You will also be given an excerpt from a response to the question. If it contains "[...]", this means that you are NOT seeing all sentences in the response. You will also be given a particular sentence from the response. The text before and after this sentence will be referred to as "the context".\n\nCRITICAL LANGUAGE REQUIREMENT: You must ALWAYS respond in the same language as the source text for ALL CONTENT. If the input sentence is in Spanish, respond in Spanish. If it is in French, respond in French. If it is in German, respond in German, etc. Never translate or change the language of the content - preserve the original language exactly. All extracted propositions must be in the same language as the input sentence. HOWEVER, keep all structural elements, format keywords, and system respon

In [4]:
load_md_prompts("claimify")

{'selection': ChatPromptTemplate(input_variables=['excerpt', 'question', 'sentence'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are an assistant to a fact-checker. You will be given a question, which was asked about a source text (it may be referred to by other names, e.g., a dataset). You will also be given an excerpt from a response to the question. If it contains "[...]", this means that you are NOT seeing all sentences in the response. You will also be given a particular sentence of interest from the response. Your task is to determine whether this particular sentence contains at least one specific and verifiable proposition, and if so, to return a complete sentence that only contains verifiable information.\n\nCRITICAL LANGUAGE REQUIREMENT: You must ALWAYS respond in the same language as the source text for ALL CONTENT. If the input sentence is in Spanish

In [5]:
chat = init_chat_model("openai:gpt-5")
excerpt = question = sentence = """Salena Zito has written a powerful new Book, “BUTLER: The Untold Story of the Near Assassination of Donald Trump and the Fight for America’s Heartland.” Salena was an eyewitness to that terrible day, and understands the unbreakable Spirit of our Movement to FIGHT, FIGHT, FIGHT. The Future of our Nation resides in the Hearts and Souls of the People of this Country, and Salena knows we will not rest until we, MAKE AMERICA GREAT AGAIN. Preorder your copy today: a.co/d/92QYTIJ"""
output = chat.invoke(get_prompt("claimify", "selection", {"excerpt": excerpt, "question": "none", "sentence": sentence}))
output

AIMessage(content='1. Thought process:\n- I need to identify if the sentence contains at least one concrete, checkable claim, not just opinions or calls to action, and if so, rewrite it to include only verifiable content.\n- The excerpt is a promotional message about Salena Zito’s book, claiming she wrote a book with a specific title, that she was an eyewitness to an event, making subjective statements about a movement and the nation’s future, and providing a preorder link.\n- The claim that Salena Zito has written a book with a specific title is a specific and verifiable proposition; statements about understanding a movement or the nation’s future are subjective, and the preorder call is not inherently a factual proposition beyond the existence of a link.\n- I will retain only the factual part about authorship and title and remove subjective language like “powerful new.”\n\n2. Contains a specific and verifiable proposition\n\n3. Salena Zito has written a book titled “BUTLER: The Untol

In [None]:
chat = ChatOllama(model="qwen3:14b")
output = chat.invoke(get_prompt("claimify", "selection", {"excerpt": "todo", "question": "todo", "sentence": "todo"}))

In [6]:
output.content

'1. Thought process:\n- I need to identify if the sentence contains at least one concrete, checkable claim, not just opinions or calls to action, and if so, rewrite it to include only verifiable content.\n- The excerpt is a promotional message about Salena Zito’s book, claiming she wrote a book with a specific title, that she was an eyewitness to an event, making subjective statements about a movement and the nation’s future, and providing a preorder link.\n- The claim that Salena Zito has written a book with a specific title is a specific and verifiable proposition; statements about understanding a movement or the nation’s future are subjective, and the preorder call is not inherently a factual proposition beyond the existence of a link.\n- I will retain only the factual part about authorship and title and remove subjective language like “powerful new.”\n\n2. Contains a specific and verifiable proposition\n\n3. Salena Zito has written a book titled “BUTLER: The Untold Story of the Nea

In [7]:
def split_into_sentences(text: str) -> List[str]:
    """
    Splits a block of text into sentences, handling paragraphs and lists.
    This replicates the methodology from Appendix C.1 of the Claimify paper.
    
    Args:
        text: The input text to split
        
    Returns:
        A list of sentence strings
    """
    
    sentences = []
    # First, split by newlines to handle paragraphs and list items
    paragraphs = text.split('\n')
    for para in paragraphs:
        if para.strip():  # Avoid empty paragraphs
            # Then, use NLTK's sentence tokenizer on each paragraph
            sentences.extend(nltk.sent_tokenize(para))
    return sentences

split_into_sentences("""Salena Zito has written a powerful new Book, “BUTLER: The Untold Story of the Near Assassination of Donald Trump and the Fight for America’s Heartland.” Salena was an eyewitness to that terrible day, and understands the unbreakable Spirit of our Movement to FIGHT, FIGHT, FIGHT. The Future of our Nation resides in the Hearts and Souls of the People of this Country, and Salena knows we will not rest until we, MAKE AMERICA GREAT AGAIN. Preorder your copy today: a.co/d/92QYTIJ""")

['Salena Zito has written a powerful new Book, “BUTLER: The Untold Story of the Near Assassination of Donald Trump and the Fight for America’s Heartland.” Salena was an eyewitness to that terrible day, and understands the unbreakable Spirit of our Movement to FIGHT, FIGHT, FIGHT.',
 'The Future of our Nation resides in the Hearts and Souls of the People of this Country, and Salena knows we will not rest until we, MAKE AMERICA GREAT AGAIN.',
 'Preorder your copy today: a.co/d/92QYTIJ']

In [10]:
pipeline = ClaimifyPipeline(init_chat_model("openai:gpt-5-mini"))

2025-08-20 16:52:24,476 - claimify.pipeline - INFO - Structured outputs enabled for improved reliability


In [11]:
article = """Předseda Pirátů Ivan Bartoš chce legalizovat heroin! Tak to je už opravdu síla! Takže nejenom že otevřeně podporuje migraci a islám (a nosí proto mikinu "jsem sluníčkář") ale ještě navíc podporuje tvrdé drogy. Chcete takovou stranu v Parlamentu, aby Vaše děti měli přístup k tvrdým drogám? Pro nás v SPD je to absolutně nepřijatelné."""

In [12]:
result = pipeline.run(article)
result

2025-08-20 16:52:28,817 - claimify.pipeline - INFO - Processing 5 sentences
2025-08-20 16:52:28,818 - claimify.pipeline - INFO - Processing sentence 1/5: Předseda Pirátů Ivan Bartoš chce legalizovat heroin!...
2025-08-20 16:52:52,524 - claimify.pipeline - INFO - SELECTION: Sentence verifiable
2025-08-20 16:53:13,408 - claimify.pipeline - INFO - DISAMBIGUATION: Sentence resolved
2025-08-20 16:53:40,980 - claimify.pipeline - INFO - DECOMPOSITION: Extracted 2 claims
2025-08-20 16:53:40,981 - claimify.pipeline - INFO - Processing sentence 2/5: Tak to je už opravdu síla!...
2025-08-20 16:53:57,423 - claimify.pipeline - INFO - SELECTION: Sentence unverifiable
2025-08-20 16:53:57,424 - claimify.pipeline - INFO - Processing sentence 3/5: Takže nejenom že otevřeně podporuje migraci a islám (a nosí proto mikinu "jsem sluníčkář") ale ještě...
2025-08-20 16:54:26,850 - claimify.pipeline - INFO - SELECTION: Sentence verifiable
2025-08-20 16:54:53,149 - claimify.pipeline - INFO - DISAMBIGUATION: Sen

['Ivan Bartoš je předseda Pirátů [předseda Pirátské strany].',
 'Ivan Bartoš chce legalizovat heroin.',
 'Ivan Bartoš otevřeně podporuje migraci.',
 'Ivan Bartoš otevřeně podporuje islám.',
 'Ivan Bartoš nosí mikinu s nápisem "jsem sluníčkář".',
 'Ivan Bartoš podporuje tvrdé drogy.',
 'Tato věta se ptá čtenářů [čtenářů tohoto textu], zda chtějí ve Sněmovně [Poslanecké sněmovně Parlamentu České republiky] politickou stranu.',
 'Tato věta naznačuje/předkládá předpoklad, že přítomnost takové politické strany ve Sněmovně [Poslanecké sněmovně Parlamentu České republiky] by vedla k tomu, že děti těchto čtenářů [dětí čtenářů tohoto textu] by měly přístup k tvrdým drogám.',
 'Existuje tvrzení [že předseda politické strany Piráti, Ivan Bartoš, chce legalizovat heroin].',
 'Politická strana SPD považuje toto tvrzení [že předseda politické strany Piráti, Ivan Bartoš, chce legalizovat heroin] za absolutně nepřijatelné.']

In [None]:
prompt = get_prompt("claimify", "disambiguation", {"question": question, "excerpt": excerpt, "sentence": sentence})
# type of prompt
prompt

[SystemMessage(content='You are an assistant to a fact-checker. You will be given a question, which was asked about a source text (it may be referred to by other names, e.g., a disa\ndataset). You will also be given an excerpt from a response to the question. If it contains "[...]", this means that you are NOT seeing all sentences in the response. You will also be given a particular sentence from the response. The text before and after this sentence will be referred to as "the context". Your task is to "decontextualize" the sentence, which means:\n1. determine whether it\'s possible to resolve partial names and undefined acronyms/abbreviations in the sentence using the question and the context; if it is possible, you will make the necessary changes to the sentence\n2. determine whether the sentence in isolation contains linguistic ambiguity that has a clear resolution using the question and the context; if it does, you will make the necessary changes to the sentence\n\nNote the followi

In [49]:
get_prompt("claimify", "selection", {"question": question, "excerpt": excerpt, "sentence": sentence})

[SystemMessage(content='You are an assistant to a fact-checker. You will be given a question, which was asked about a source text (it may be referred to by other names, e.g., a \ndataset). You will also be given an excerpt from a response to the question. If it contains "[...]", this means that you are NOT seeing all sentences in the response. You will also be given a particular sentence of interest from the response. Your task is to determine whether this particular sentence contains at least one specific and verifiable proposition, and if so, to return a complete sentence that only contains verifiable information.   \n\nNote the following rules:\n- If the sentence is about a lack of information, e.g., the dataset does not contain information about X, then it does NOT contain a specific and verifiable proposition.\n- It does NOT matter whether the proposition is true or false.\n- It does NOT matter whether the proposition is relevant to the question.\n- It does NOT matter whether the 