# Claim & Image Peek

Interactively browse the AVeriMaTec validation claims and preview the images referenced in each record.

> Set the slider (or use the random button) below to inspect a claim, its metadata, the claim images, and question-level input images.

In [1]:
from pathlib import Path
import json
import random

from IPython.display import display, HTML
from PIL import Image
import ipywidgets as widgets

SPLIT = 'val'
VAL_JSON_PATH = Path(f'/mnt/data/factcheck/averimatec/{SPLIT}.json')
IMAGES_DIR = Path('/mnt/data/factcheck/averimatec/images')


if not VAL_JSON_PATH.exists():
    raise FileNotFoundError(f'Missing val.json at {VAL_JSON_PATH}')
if not IMAGES_DIR.exists():
    raise FileNotFoundError(f'Missing images directory at {IMAGES_DIR}')

print(f'val.json located at: {VAL_JSON_PATH}')
print(f'images directory located at: {IMAGES_DIR}')

val.json located at: /mnt/data/factcheck/averimatec/val.json
images directory located at: /mnt/data/factcheck/averimatec/images


In [2]:
with VAL_JSON_PATH.open('r', encoding='utf-8') as f:
    claims = json.load(f)

print(f'Loaded {len(claims):,} validation claims.')

Loaded 152 validation claims.


In [3]:
def _image_path(image_name: str) -> Path:
    return IMAGES_DIR / image_name


def display_image(image_name: str, caption: str = 'Image') -> None:
    path = _image_path(image_name)
    if not path.exists():
        display(HTML(f"<b>Missing image file:</b> {image_name}"))
        return

    with Image.open(path) as img:
        display(HTML(f"<em>{caption}</em> — {image_name} ({img.width}×{img.height}px)"))
        display(img)


def render_claim(idx: int) -> None:
    if not (0 <= idx < len(claims)):
        raise IndexError(f'Index {idx} is outside the dataset range (0-{len(claims) - 1}).')

    claim = claims[idx]
    metadata = claim.get('metadata', {})

    header_html = f"""
    <h3>Claim #{idx}</h3>
    <ul>
        <li><b>Date:</b> {claim.get('date', '—')}</li>
        <li><b>Label:</b> {claim.get('label', '—')}</li>
        <li><b>Article:</b> <a href='{claim.get('article', '')}' target='_blank'>{claim.get('article', '')}</a></li>
        <li><b>Claim text:</b> {claim.get('claim_text', '—')}</li>
        <li><b>Speaker:</b> {metadata.get('speaker', '—')}</li>
    </ul>
    <p><b>Justification:</b> {claim.get('justification', '—')}</p>
    """
    display(HTML(header_html))

    if claim.get('claim_images'):
        display(HTML('<h4>Claim images</h4>'))
        for image_name in claim['claim_images']:
            display_image(image_name, caption='Claim image')
    else:
        display(HTML('<p>No claim-level images listed.</p>'))

    if claim.get('questions'):
        display(HTML('<h4>Questions & evidence</h4>'))
        for q_idx, question in enumerate(claim['questions']):
            question_html = f"""
            <details open>
                <summary><b>Question {q_idx + 1}:</b> {question.get('question', '—')}</summary>
                <ul>
                    <li><b>Answer method:</b> {question.get('answer_method', '—')}</li>
                    <li><b>Question type:</b> {', '.join(question.get('question_type', [])) or '—'}</li>
                </ul>
            </details>
            """
            display(HTML(question_html))

            for ans in question.get('answers', []):
                answer_html = f"""
                <p><b>Answer ({ans.get('answer_type', '—')}):</b> {ans.get('answer_text', '—')}</p>
                <p><b>Source:</b> <a href='{ans.get('source_url', '')}' target='_blank'>{ans.get('source_url', '')}</a> ({ans.get('source_medium', '—')})</p>
                """
                display(HTML(answer_html))

            if question.get('input_images'):
                display(HTML('<p><b>Input images:</b></p>'))
                for image_name in question['input_images']:
                    display_image(image_name, caption='Question image')
            else:
                display(HTML('<p>No question-level images.</p>'))
    else:
        display(HTML('<p>No questions attached to this claim.</p>'))

In [4]:
index_slider = widgets.IntSlider(
    value=0,
    min=0,
    max=len(claims) - 1,
    step=1,
    description='Claim #',
    continuous_update=False,
    readout=True,
    readout_format='d'
)

random_button = widgets.Button(description='Random claim', icon='shuffle')
output = widgets.Output()


def update_view(idx: int) -> None:
    output.clear_output()
    with output:
        render_claim(idx)


def on_slider_change(change):
    if change['name'] == 'value' and change['new'] is not None:
        update_view(change['new'])


def on_random_click(_):
    index_slider.value = random.randint(index_slider.min, index_slider.max)

index_slider.observe(on_slider_change, names='value')
random_button.on_click(on_random_click)

display(widgets.HBox([index_slider, random_button]))
display(output)

update_view(index_slider.value)

HBox(children=(IntSlider(value=0, continuous_update=False, description='Claim #', max=151), Button(description…

Output()

In [13]:
# load as dataframe
import pandas as pd
df = pd.json_normalize(claims, sep='_')
# write a hist of image vs text evidence
import matplotlib.pyplot as plt
df['image_evidence'].value_counts().plot(kind='bar')
plt.show()

KeyError: 'image_evidence'

In [12]:
df

Unnamed: 0,article,date,label,location,questions,justification,claim_text,claim_images,metadata_speaker,metadata_transcription,...,metadata_reporting_source,metadata_claim_types,metadata_fact_checking_strategies,metadata_modality,metadata_refuting_reasons,metadata_image_misuse_types,metadata_image_used,claim_id,image_count,claim_images_len
0,https://web.archive.org/web/20230610180155/htt...,2023-07-04,Refuted,,[{'input_images': ['67878fcce2f02e5f498136dd#I...,The claim that the woman has given birth to he...,Photo shows a 101-year-old woman who has given...,[67878fcce2f02e5f498136dd#CLAIM#0.jpg],The Love for Babies,,...,Facebook,[Event/Property Claim],"[Written Evidence, Reverse Image Search, Fact-...",Image-text,[Misuse of images],[Out-of-context],Yes,0,218,1
1,https://web.archive.org/web/20230609035730/htt...,2023-06-03,Refuted,IN,"[{'input_images': [], 'answers': [{'answer_typ...",The claim is successfully refuted because evid...,Images show the renovation of the Cuttack hosp...,"[6787a513e2f02e5f49815c30#CLAIM#0.jpg, 6787a51...",@iamyournaveen,,...,Twitter,[Event/Property Claim],"[Written Evidence, Reverse Image Search]",Image-text,[Misuse of images],[Out-of-context],Yes,1,434,2
2,https://web.archive.org/web/20230629073625/htt...,2023-06-13,Refuted,US,"[{'input_images': [], 'answers': [{'answer_typ...",The claim is refuted by proving that the image...,Photos show that the craft store chain Hobby L...,"[6787dee1e2f02e5f4981b14e#CLAIM#0.jpg, 6787dee...",Carousel Studios,,...,Facebook,[Event/Property Claim],"[Written Evidence, Reverse Image Search, Fact-...",Image-text,[Misuse of images],[Out-of-context],Yes,2,338,4
3,https://web.archive.org/web/20240723112024/htt...,2023-06-21,Refuted,,"[{'input_images': [], 'answers': [{'answer_typ...",The claim is refuted initially by proving that...,An image shows the missing submersible Titan n...,[6787e7eee2f02e5f4981c2b7#CLAIM#0.jpg],@Santa_Hustler,,...,Twitter,[Event/Property Claim],[Written Evidence],Image-text,[Misuse of images],[Out-of-context],Yes,3,353,1
4,https://web.archive.org/web/20240822171903/htt...,2023-06-25,Not Enough Evidence,US,"[{'input_images': [], 'answers': [{'answer_typ...",The information provided only states the sourc...,"A new study proves 8,000 more ballots were cou...",[6787eaf9e2f02e5f4981c66a#CLAIM#0.jpg],@BehizyTweets,,...,Twitter,[Event/Property Claim],"[Numerical Comparison, Consultation]",Image-text,[Textual refuted],[],Yes,4,197,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
147,https://web.archive.org/web/20240831174555/htt...,2023-06-01,Refuted,IN,[{'input_images': ['67d08e8435531374f6da9388#I...,The claim states that a Hindu girl was killed ...,Image shows the body of a Hindu girl who was f...,[67d08e8435531374f6da9388#CLAIM#0.jpg],@shibbu87,,...,X,[Media Analysis Claim],"[Written Evidence, Keyword Search]",Image-text,[Misuse of images],[Out-of-context],Yes,147,366,1
148,https://web.archive.org/web/20240610042208/htt...,2023-06-02,Refuted,IN,[{'input_images': ['67d0924d35531374f6daa1f1#I...,The claim states a mosque is near the train ac...,Image shows a mosque is present beside the tri...,[67d0924d35531374f6daa1f1#CLAIM#0.jpg],Dilip Kashyap,,...,Facebook,[Media Analysis Claim],"[Written Evidence, Consultation, Image Analysi...",Image-text,[Misuse of images],[Out-of-context],Yes,148,234,1
149,https://web.archive.org/web/20240618034518/htt...,2023-06-19,Refuted,IN,[{'input_images': ['67d09dbe35531374f6dadaf8#I...,The images were originally published in July 2...,Images show Anti-Modi posters in the US amid h...,[67d09dbe35531374f6dadaf8#CLAIM#0.jpg],@abdquddus117,Mr. N Modi We only rob bank you rob the whole ...,...,X,[Media Analysis Claim],"[Written Evidence, Keyword Search]",Image-text,[Misuse of images],[Out-of-context],Yes,149,283,1
150,https://web.archive.org/web/20240420080254/htt...,2023-06-16,Refuted,US,[{'input_images': ['67d0a15035531374f6dae467#I...,The answers indicate that the military activit...,Images show Tanks & Troops deploying inside U...,"[67d0a15035531374f6dae467#CLAIM#0.jpg, 67d0a15...",Hal Turner,,...,halturnerradioshow.com,[Media Analysis Claim],"[Written Evidence, Consultation, Video Analysis]",Image-text,[Misuse of images],[Out-of-context],Yes,150,358,3


In [11]:
#add claim_images_len
df["claim_images_len"] = df["claim_images"].apply(len)

In [None]:
df

Unnamed: 0,article,date,label,location,questions,justification,claim_text,claim_images,metadata_speaker,metadata_transcription,metadata_media_source,metadata_original_claim_url,metadata_reporting_source,metadata_claim_types,metadata_fact_checking_strategies,metadata_modality,metadata_refuting_reasons,metadata_image_misuse_types,metadata_image_used
0,https://web.archive.org/web/20230610180155/htt...,2023-07-04,Refuted,,[{'input_images': ['67878fcce2f02e5f498136dd#I...,The claim that the woman has given birth to he...,Photo shows a 101-year-old woman who has given...,[67878fcce2f02e5f498136dd#CLAIM#0.jpg],The Love for Babies,,,,Facebook,[Event/Property Claim],"[Written Evidence, Reverse Image Search, Fact-...",Image-text,[Misuse of images],[Out-of-context],Yes
1,https://web.archive.org/web/20230609035730/htt...,2023-06-03,Refuted,IN,"[{'input_images': [], 'answers': [{'answer_typ...",The claim is successfully refuted because evid...,Images show the renovation of the Cuttack hosp...,"[6787a513e2f02e5f49815c30#CLAIM#0.jpg, 6787a51...",@iamyournaveen,,https://archive.ph/886yS/1037b8f782ce4fbbec564...,https://archive.ph/886yS#selection-357.0-525.16,Twitter,[Event/Property Claim],"[Written Evidence, Reverse Image Search]",Image-text,[Misuse of images],[Out-of-context],Yes
2,https://web.archive.org/web/20230629073625/htt...,2023-06-13,Refuted,US,"[{'input_images': [], 'answers': [{'answer_typ...",The claim is refuted by proving that the image...,Photos show that the craft store chain Hobby L...,"[6787dee1e2f02e5f4981b14e#CLAIM#0.jpg, 6787dee...",Carousel Studios,,,,Facebook,[Event/Property Claim],"[Written Evidence, Reverse Image Search, Fact-...",Image-text,[Misuse of images],[Out-of-context],Yes
3,https://web.archive.org/web/20240723112024/htt...,2023-06-21,Refuted,,"[{'input_images': [], 'answers': [{'answer_typ...",The claim is refuted initially by proving that...,An image shows the missing submersible Titan n...,[6787e7eee2f02e5f4981c2b7#CLAIM#0.jpg],@Santa_Hustler,,,https://x.com/Santa_Hustler/status/16717870504...,Twitter,[Event/Property Claim],[Written Evidence],Image-text,[Misuse of images],[Out-of-context],Yes
4,https://web.archive.org/web/20240822171903/htt...,2023-06-25,Not Enough Evidence,US,"[{'input_images': [], 'answers': [{'answer_typ...",The information provided only states the sourc...,"A new study proves 8,000 more ballots were cou...",[6787eaf9e2f02e5f4981c66a#CLAIM#0.jpg],@BehizyTweets,,,https://x.com/BehizyTweets/status/167338427061...,Twitter,[Event/Property Claim],"[Numerical Comparison, Consultation]",Image-text,[Textual refuted],[],Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
147,https://web.archive.org/web/20240831174555/htt...,2023-06-01,Refuted,IN,[{'input_images': ['67d08e8435531374f6da9388#I...,The claim states that a Hindu girl was killed ...,Image shows the body of a Hindu girl who was f...,[67d08e8435531374f6da9388#CLAIM#0.jpg],@shibbu87,,https://pbs.twimg.com/media/FxnSo7OaAAEUJEA?fo...,https://x.com/shibbu87/status/1664593622396985344,X,[Media Analysis Claim],"[Written Evidence, Keyword Search]",Image-text,[Misuse of images],[Out-of-context],Yes
148,https://web.archive.org/web/20240610042208/htt...,2023-06-02,Refuted,IN,[{'input_images': ['67d0924d35531374f6daa1f1#I...,The claim states a mosque is near the train ac...,Image shows a mosque is present beside the tri...,[67d0924d35531374f6daa1f1#CLAIM#0.jpg],Dilip Kashyap,,https://ghostarchive.org/archive/replay/w/id-8...,https://ghostarchive.org/archive/eNp8B,Facebook,[Media Analysis Claim],"[Written Evidence, Consultation, Image Analysi...",Image-text,[Misuse of images],[Out-of-context],Yes
149,https://web.archive.org/web/20240618034518/htt...,2023-06-19,Refuted,IN,[{'input_images': ['67d09dbe35531374f6dadaf8#I...,The images were originally published in July 2...,Images show Anti-Modi posters in the US amid h...,[67d09dbe35531374f6dadaf8#CLAIM#0.jpg],@abdquddus117,Mr. N Modi We only rob bank you rob the whole ...,https://archive.ph/zbxMq/81e5878e3269bceb520ae...,https://archive.ph/zbxMq,X,[Media Analysis Claim],"[Written Evidence, Keyword Search]",Image-text,[Misuse of images],[Out-of-context],Yes
150,https://web.archive.org/web/20240420080254/htt...,2023-06-16,Refuted,US,[{'input_images': ['67d0a15035531374f6dae467#I...,The answers indicate that the military activit...,Images show Tanks & Troops deploying inside U...,"[67d0a15035531374f6dae467#CLAIM#0.jpg, 67d0a15...",Hal Turner,,https://web.archive.org/web/20230618011405im_/...,https://web.archive.org/web/20230617224616/htt...,halturnerradioshow.com,[Media Analysis Claim],"[Written Evidence, Consultation, Video Analysis]",Image-text,[Misuse of images],[Out-of-context],Yes


In [7]:
# add claim id = row order (0-based)
df['claim_id'] = df.index

In [8]:
import os
# augment df by image count (# images in /mnt/data/factcheck/averimatec/knowledge_store/train/image_related/image_related_store_image_train/{i})
df['image_count'] = df['claim_id'].apply(lambda x: len(os.listdir(f'/mnt/data/factcheck/averimatec/knowledge_store/{SPLIT}/image_related/image_related_store_image_{SPLIT}/{x}')))

In [9]:
df

Unnamed: 0,article,date,label,location,questions,justification,claim_text,claim_images,metadata_speaker,metadata_transcription,...,metadata_original_claim_url,metadata_reporting_source,metadata_claim_types,metadata_fact_checking_strategies,metadata_modality,metadata_refuting_reasons,metadata_image_misuse_types,metadata_image_used,claim_id,image_count
0,https://web.archive.org/web/20230610180155/htt...,2023-07-04,Refuted,,[{'input_images': ['67878fcce2f02e5f498136dd#I...,The claim that the woman has given birth to he...,Photo shows a 101-year-old woman who has given...,[67878fcce2f02e5f498136dd#CLAIM#0.jpg],The Love for Babies,,...,,Facebook,[Event/Property Claim],"[Written Evidence, Reverse Image Search, Fact-...",Image-text,[Misuse of images],[Out-of-context],Yes,0,218
1,https://web.archive.org/web/20230609035730/htt...,2023-06-03,Refuted,IN,"[{'input_images': [], 'answers': [{'answer_typ...",The claim is successfully refuted because evid...,Images show the renovation of the Cuttack hosp...,"[6787a513e2f02e5f49815c30#CLAIM#0.jpg, 6787a51...",@iamyournaveen,,...,https://archive.ph/886yS#selection-357.0-525.16,Twitter,[Event/Property Claim],"[Written Evidence, Reverse Image Search]",Image-text,[Misuse of images],[Out-of-context],Yes,1,434
2,https://web.archive.org/web/20230629073625/htt...,2023-06-13,Refuted,US,"[{'input_images': [], 'answers': [{'answer_typ...",The claim is refuted by proving that the image...,Photos show that the craft store chain Hobby L...,"[6787dee1e2f02e5f4981b14e#CLAIM#0.jpg, 6787dee...",Carousel Studios,,...,,Facebook,[Event/Property Claim],"[Written Evidence, Reverse Image Search, Fact-...",Image-text,[Misuse of images],[Out-of-context],Yes,2,338
3,https://web.archive.org/web/20240723112024/htt...,2023-06-21,Refuted,,"[{'input_images': [], 'answers': [{'answer_typ...",The claim is refuted initially by proving that...,An image shows the missing submersible Titan n...,[6787e7eee2f02e5f4981c2b7#CLAIM#0.jpg],@Santa_Hustler,,...,https://x.com/Santa_Hustler/status/16717870504...,Twitter,[Event/Property Claim],[Written Evidence],Image-text,[Misuse of images],[Out-of-context],Yes,3,353
4,https://web.archive.org/web/20240822171903/htt...,2023-06-25,Not Enough Evidence,US,"[{'input_images': [], 'answers': [{'answer_typ...",The information provided only states the sourc...,"A new study proves 8,000 more ballots were cou...",[6787eaf9e2f02e5f4981c66a#CLAIM#0.jpg],@BehizyTweets,,...,https://x.com/BehizyTweets/status/167338427061...,Twitter,[Event/Property Claim],"[Numerical Comparison, Consultation]",Image-text,[Textual refuted],[],Yes,4,197
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
147,https://web.archive.org/web/20240831174555/htt...,2023-06-01,Refuted,IN,[{'input_images': ['67d08e8435531374f6da9388#I...,The claim states that a Hindu girl was killed ...,Image shows the body of a Hindu girl who was f...,[67d08e8435531374f6da9388#CLAIM#0.jpg],@shibbu87,,...,https://x.com/shibbu87/status/1664593622396985344,X,[Media Analysis Claim],"[Written Evidence, Keyword Search]",Image-text,[Misuse of images],[Out-of-context],Yes,147,366
148,https://web.archive.org/web/20240610042208/htt...,2023-06-02,Refuted,IN,[{'input_images': ['67d0924d35531374f6daa1f1#I...,The claim states a mosque is near the train ac...,Image shows a mosque is present beside the tri...,[67d0924d35531374f6daa1f1#CLAIM#0.jpg],Dilip Kashyap,,...,https://ghostarchive.org/archive/eNp8B,Facebook,[Media Analysis Claim],"[Written Evidence, Consultation, Image Analysi...",Image-text,[Misuse of images],[Out-of-context],Yes,148,234
149,https://web.archive.org/web/20240618034518/htt...,2023-06-19,Refuted,IN,[{'input_images': ['67d09dbe35531374f6dadaf8#I...,The images were originally published in July 2...,Images show Anti-Modi posters in the US amid h...,[67d09dbe35531374f6dadaf8#CLAIM#0.jpg],@abdquddus117,Mr. N Modi We only rob bank you rob the whole ...,...,https://archive.ph/zbxMq,X,[Media Analysis Claim],"[Written Evidence, Keyword Search]",Image-text,[Misuse of images],[Out-of-context],Yes,149,283
150,https://web.archive.org/web/20240420080254/htt...,2023-06-16,Refuted,US,[{'input_images': ['67d0a15035531374f6dae467#I...,The answers indicate that the military activit...,Images show Tanks & Troops deploying inside U...,"[67d0a15035531374f6dae467#CLAIM#0.jpg, 67d0a15...",Hal Turner,,...,https://web.archive.org/web/20230617224616/htt...,halturnerradioshow.com,[Media Analysis Claim],"[Written Evidence, Consultation, Video Analysis]",Image-text,[Misuse of images],[Out-of-context],Yes,150,358


In [None]:
# unwrap questions from json

questions_list = []
for claim_idx, claim in enumerate(claims):
    for question in claim.get("questions", []):
        question_flat = question.copy()
        question_flat["claim_idx"] = claim_idx
        questions_list.append(question_flat)

questions_df = pd.json_normalize(questions_list, sep="_")

In [None]:
questions_df

Unnamed: 0,input_images,answers,question,answer_method,question_type,claim_idx
0,[],"[{'answer_type': 'Abstractive', 'answer_text':...",When did Dr Yogesh Kumar and Dr Avlokita Ashok...,Image-search,[Text-related],0
1,[],"[{'answer_type': 'Extractive', 'answer_text': ...",What date was President Joe Biden's inaugural ...,Text-search,[Text-related],1
2,[],"[{'answer_type': 'Extractive', 'answer_text': ...",What date was President Donald Trump's inaugur...,Text-search,[Text-related],1
3,[],"[{'answer_type': 'Extractive', 'answer_text': ...",Were covid measures in place during Joe Biden'...,Text-search,[Text-related],1
4,[6787903de2f02e5f49813736#INPUT#0#0.jpg],"[{'answer_type': 'Extractive', 'answer_text': ...",When was the photo first reported?,Image-search,[Image-related],2
...,...,...,...,...,...,...
2265,[],"[{'answer_type': 'Abstractive', 'answer_text':...",Where is Rafah?,Text-search,[Text-related],791
2266,[680bae8fa593b4e4ef122c80#INPUT#0#0.jpg],"[{'answer_type': 'Extractive', 'answer_text': ...",How was the map shown in the 0-th image created?,Image-search,[Image-related],792
2267,[680bae8fa593b4e4ef122c80#INPUT#1#0.jpg],"[{'answer_type': 'Boolean', 'answer_text': 'No...",Is the map seen in the 0-th image a genuine ro...,Image-search,[Image-related],792
2268,[],"[{'answer_type': 'Boolean', 'answer_text': 'No...",Do all roads lead to Rome?,Text-search,[Text-related],792


In [None]:
# show proportion of image claims
df['claim'].value_counts(normalize=True)    

KeyError: 'claim'