# Claim & Image Peek

Interactively browse the AVeriMaTec validation claims and preview the images referenced in each record.

> Set the slider (or use the random button) below to inspect a claim, its metadata, the claim images, and question-level input images.

In [1]:
from pathlib import Path
import json
import random

from IPython.display import display, HTML
from PIL import Image
import ipywidgets as widgets

VAL_JSON_PATH = Path('/mnt/data/factcheck/averimatec/train.json')
IMAGES_DIR = Path('/mnt/data/factcheck/averimatec/images')

if not VAL_JSON_PATH.exists():
    raise FileNotFoundError(f'Missing val.json at {VAL_JSON_PATH}')
if not IMAGES_DIR.exists():
    raise FileNotFoundError(f'Missing images directory at {IMAGES_DIR}')

print(f'val.json located at: {VAL_JSON_PATH}')
print(f'images directory located at: {IMAGES_DIR}')

val.json located at: /mnt/data/factcheck/averimatec/train.json
images directory located at: /mnt/data/factcheck/averimatec/images


In [2]:
with VAL_JSON_PATH.open('r', encoding='utf-8') as f:
    claims = json.load(f)

print(f'Loaded {len(claims):,} validation claims.')

Loaded 793 validation claims.


In [3]:
def _image_path(image_name: str) -> Path:
    return IMAGES_DIR / image_name


def display_image(image_name: str, caption: str = 'Image') -> None:
    path = _image_path(image_name)
    if not path.exists():
        display(HTML(f"<b>Missing image file:</b> {image_name}"))
        return

    with Image.open(path) as img:
        display(HTML(f"<em>{caption}</em> — {image_name} ({img.width}×{img.height}px)"))
        display(img)


def render_claim(idx: int) -> None:
    if not (0 <= idx < len(claims)):
        raise IndexError(f'Index {idx} is outside the dataset range (0-{len(claims) - 1}).')

    claim = claims[idx]
    metadata = claim.get('metadata', {})

    header_html = f"""
    <h3>Claim #{idx}</h3>
    <ul>
        <li><b>Date:</b> {claim.get('date', '—')}</li>
        <li><b>Label:</b> {claim.get('label', '—')}</li>
        <li><b>Article:</b> <a href='{claim.get('article', '')}' target='_blank'>{claim.get('article', '')}</a></li>
        <li><b>Claim text:</b> {claim.get('claim_text', '—')}</li>
        <li><b>Speaker:</b> {metadata.get('speaker', '—')}</li>
    </ul>
    <p><b>Justification:</b> {claim.get('justification', '—')}</p>
    """
    display(HTML(header_html))

    if claim.get('claim_images'):
        display(HTML('<h4>Claim images</h4>'))
        for image_name in claim['claim_images']:
            display_image(image_name, caption='Claim image')
    else:
        display(HTML('<p>No claim-level images listed.</p>'))

    if claim.get('questions'):
        display(HTML('<h4>Questions & evidence</h4>'))
        for q_idx, question in enumerate(claim['questions']):
            question_html = f"""
            <details open>
                <summary><b>Question {q_idx + 1}:</b> {question.get('question', '—')}</summary>
                <ul>
                    <li><b>Answer method:</b> {question.get('answer_method', '—')}</li>
                    <li><b>Question type:</b> {', '.join(question.get('question_type', [])) or '—'}</li>
                </ul>
            </details>
            """
            display(HTML(question_html))

            for ans in question.get('answers', []):
                answer_html = f"""
                <p><b>Answer ({ans.get('answer_type', '—')}):</b> {ans.get('answer_text', '—')}</p>
                <p><b>Source:</b> <a href='{ans.get('source_url', '')}' target='_blank'>{ans.get('source_url', '')}</a> ({ans.get('source_medium', '—')})</p>
                """
                display(HTML(answer_html))

            if question.get('input_images'):
                display(HTML('<p><b>Input images:</b></p>'))
                for image_name in question['input_images']:
                    display_image(image_name, caption='Question image')
            else:
                display(HTML('<p>No question-level images.</p>'))
    else:
        display(HTML('<p>No questions attached to this claim.</p>'))

In [4]:
index_slider = widgets.IntSlider(
    value=0,
    min=0,
    max=len(claims) - 1,
    step=1,
    description='Claim #',
    continuous_update=False,
    readout=True,
    readout_format='d'
)

random_button = widgets.Button(description='Random claim', icon='shuffle')
output = widgets.Output()


def update_view(idx: int) -> None:
    output.clear_output()
    with output:
        render_claim(idx)


def on_slider_change(change):
    if change['name'] == 'value' and change['new'] is not None:
        update_view(change['new'])


def on_random_click(_):
    index_slider.value = random.randint(index_slider.min, index_slider.max)

index_slider.observe(on_slider_change, names='value')
random_button.on_click(on_random_click)

display(widgets.HBox([index_slider, random_button]))
display(output)

update_view(index_slider.value)

HBox(children=(IntSlider(value=0, continuous_update=False, description='Claim #', max=792), Button(description…

Output()

In [5]:
# load as dataframe
import pandas as pd
df = pd.json_normalize(claims, sep='_')

In [6]:
df

Unnamed: 0,article,date,label,location,questions,justification,claim_text,claim_images,metadata_speaker,metadata_transcription,metadata_media_source,metadata_original_claim_url,metadata_reporting_source,metadata_claim_types,metadata_fact_checking_strategies,metadata_modality,metadata_refuting_reasons,metadata_image_misuse_types,metadata_image_used
0,https://web.archive.org/web/20230327144122/htt...,2023-03-17,Refuted,IN,"[{'input_images': [], 'answers': [{'answer_typ...",The claim that the couple passed the IAS exami...,A couple in India has recently passed the IAS ...,[6786bb7e401b0522f02b66e1#CLAIM#0.jpg],Ashish Yt,,,https://perma.cc/ND8H-PE99,Facebook,[Event/Property Claim],"[Written Evidence, Consultation, Keyword Search]",Image-text,[],[],Yes
1,https://web.archive.org/web/20230330053504/htt...,2023-01-23,Refuted,US,"[{'input_images': [], 'answers': [{'answer_typ...",The claim that an adequate comparison can be m...,Photos from former President Donald Trump's an...,"[6787898ce2f02e5f49812d00#CLAIM#0.jpg, 6787898...",jackmposobiec,,,https://www.instagram.com/p/CnuAyhAueVS/,Facebook,[Event/Property Claim],"[Written Evidence, Numerical Comparison]",Image-text,[Misuse of images],[Out-of-context],Yes
2,https://web.archive.org/web/20230512150508/htt...,2023-02-19,Refuted,IN,[{'input_images': ['6787903de2f02e5f49813736#I...,The claim that Mufti is shown after the abroga...,"The image shows Mehbooba Mufti, former Chief M...",[6787903de2f02e5f49813736#CLAIM#0.jpg],@idineshdesai,,,https://web.archive.org/web/20230221052846/htt...,twitter,[Event/Property Claim],"[Written Evidence, Reverse Image Search, Fact-...",Image-text,[Misuse of images],[Out-of-context],Yes
3,https://web.archive.org/web/20230505014524/htt...,2023-04-29,Refuted,FR,"[{'input_images': [], 'answers': [{'answer_typ...",The claim that the image shows a protest again...,An image shows millions of people protesting i...,[6787969ae2f02e5f49814d96#CLAIM#0.jpg],@dave_tubb3,,,https://web.archive.org/web/20230501151050/htt...,Twitter,[Event/Property Claim],"[Media Source Discovery, Fact-checker Reference]",Image-text,[Misuse of images],[Out-of-context],Yes
4,https://web.archive.org/web/20230216104751/htt...,2022-12-07,Refuted,IN,[{'input_images': ['67879cb8e2f02e5f4981534a#I...,The claim that the bullet trains are in Gujara...,Image shows bullet trains lined up in Gujarat,[67879cb8e2f02e5f4981534a#CLAIM#0.jpg],Voice of Maharashtra,,,,Facebook,[Event/Property Claim],"[Written Evidence, Consultation, Reverse Image...",Image-text,[Misuse of images],[Out-of-context],Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
788,https://web.archive.org/web/20241129145843/htt...,2013-01-01,Supported,US,[{'input_images': ['67f86013b9f34f625412162a#I...,CLAIM_IMG_0 was taken by photographer Ari Abra...,A photo shared online in 2024 authentically sh...,[67f86013b9f34f625412162a#CLAIM#0.jpg],karaxelizabeth12,,https://www.instagram.com/p/CTtKuY3lj8b/,https://www.flickr.com/photos/karaxelizabethh/...,flickr,[Event/Property Claim],"[Written Evidence, Consultation, Media Source ...",Image-text,[],[],Yes
789,https://web.archive.org/web/20240711110656/htt...,2016-08-02,Supported,GB,[{'input_images': ['67f786601c10ad2b61108a6e#I...,"Based on the QA pairs, the claim is supported....",A photograph shared widely online shows a real...,[67f786601c10ad2b61108a6e#CLAIM#0.jpg],Aarian Marshall,,https://web.archive.org/web/20240306150959/htt...,https://web.archive.org/web/20240306152117/htt...,Wired,"[Event/Property Claim, Media Analysis Claim]","[Written Evidence, Video Analysis, Geolocation]",Image-text,[],[],Yes
790,https://web.archive.org/web/20250329184705/htt...,2020-05-22,Supported,RU,[{'input_images': ['67f7c0c41c10ad2b61123eaf#I...,"The photos are of the same man, Evgeny Kobytev...",Photos shared in viral social media and blog p...,"[67f7c0c41c10ad2b61123eaf#CLAIM#0.jpg, 67f7c0c...",@Dr_TheHistories,,http://mus-pozdeyev.ru/p2/news/32/,https://archive.ph/dHu09,X,[Event/Property Claim],"[Written Evidence, Reverse Image Search]",Image-text,[],[],Yes
791,https://web.archive.org/web/20250416162223/htt...,2022-05-26,Refuted,IL,[{'input_images': ['680d221fa593b4e4ef14c640#I...,The claim is refuted by proving that the incid...,Image shows multiple covered bodies with peopl...,[680d221fa593b4e4ef14c640#CLAIM#0.jpg],Bisan Owda,,,https://www.facebook.com/photo/?fbid=122149299...,Facebook,[Event/Property Claim],"[Written Evidence, Media Source Discovery, Rev...",Image-text,[Misuse of images],[Out-of-context],Yes


In [7]:
# unwrap questions from json

questions_list = []
for claim_idx, claim in enumerate(claims):
    for question in claim.get('questions', []):
        question_flat = question.copy()
        question_flat['claim_idx'] = claim_idx
        questions_list.append(question_flat)
        
questions_df = pd.json_normalize(questions_list, sep='_')

In [8]:
questions_df

Unnamed: 0,input_images,answers,question,answer_method,question_type,claim_idx
0,[],"[{'answer_type': 'Abstractive', 'answer_text':...",When did Dr Yogesh Kumar and Dr Avlokita Ashok...,Image-search,[Text-related],0
1,[],"[{'answer_type': 'Extractive', 'answer_text': ...",What date was President Joe Biden's inaugural ...,Text-search,[Text-related],1
2,[],"[{'answer_type': 'Extractive', 'answer_text': ...",What date was President Donald Trump's inaugur...,Text-search,[Text-related],1
3,[],"[{'answer_type': 'Extractive', 'answer_text': ...",Were covid measures in place during Joe Biden'...,Text-search,[Text-related],1
4,[6787903de2f02e5f49813736#INPUT#0#0.jpg],"[{'answer_type': 'Extractive', 'answer_text': ...",When was the photo first reported?,Image-search,[Image-related],2
...,...,...,...,...,...,...
2265,[],"[{'answer_type': 'Abstractive', 'answer_text':...",Where is Rafah?,Text-search,[Text-related],791
2266,[680bae8fa593b4e4ef122c80#INPUT#0#0.jpg],"[{'answer_type': 'Extractive', 'answer_text': ...",How was the map shown in the 0-th image created?,Image-search,[Image-related],792
2267,[680bae8fa593b4e4ef122c80#INPUT#1#0.jpg],"[{'answer_type': 'Boolean', 'answer_text': 'No...",Is the map seen in the 0-th image a genuine ro...,Image-search,[Image-related],792
2268,[],"[{'answer_type': 'Boolean', 'answer_text': 'No...",Do all roads lead to Rome?,Text-search,[Text-related],792


In [9]:
# show proportion of image claims
df['claim'].value_counts(normalize=True)    

KeyError: 'claim'