In [7]:
import requests
from IPython.display import Image, display
from bs4 import BeautifulSoup
import urllib.parse

def search_commons_images_with_captions(keyword, limit=5):
    # Step 1: Search for images
    search_url = "https://commons.wikimedia.org/w/api.php"
    params = {
        "action": "query",
        "generator": "search",
        "gsrsearch": keyword,
        "gsrnamespace": 6,
        "gsrlimit": limit,
        "prop": "imageinfo",
        "iiprop": "url",
        "format": "json"
    }

    response = requests.get(search_url, params=params)
    response.raise_for_status()
    data = response.json()
    pages = data.get("query", {}).get("pages", {})

    if not pages:
        print("No results found.")
        return

    # Step 2: Iterate over results
    for page in pages.values():
        title = page.get("title")  # e.g. "File:Example.jpg"
        imageinfo = page.get("imageinfo", [])
        if not imageinfo:
            continue
        image_url = imageinfo[0]["url"]

        # Step 3: Construct Wikipedia file page URL (mobile version)
        file_title_encoded = urllib.parse.quote(title)
        wiki_url = f"https://en.m.wikipedia.org/wiki/{file_title_encoded}"

        # Step 4: Scrape the description/caption
        try:
            wiki_response = requests.get(wiki_url)
            soup = BeautifulSoup(wiki_response.content, "html.parser")
            caption_div = soup.find("div", class_="description en")
            caption = caption_div.get_text(strip=True) if caption_div else "No caption found."
        except Exception as e:
            caption = f"Error fetching caption: {e}"

        # Step 5: Display image + caption
        print(f"\nTitle: {title}")
        print(f"URL: {image_url}")
        print(f"Caption: {caption}")
        display(Image(url=image_url))

# Example usage
search_commons_images_with_captions("Battle of Somme", limit=3)



Title: File:British Mark I male tank Somme 25 September 1916.jpg
URL: https://upload.wikimedia.org/wikipedia/commons/f/f6/British_Mark_I_male_tank_Somme_25_September_1916.jpg
Caption: English:An early model British Mark I "male" tank, named C-15, near Thiepval, 25 September 1916. The tank is probably in reserve for theBattle of Thiepval Ridgewhich began on 26 September. The tank is fitted with the wire "grenade shield" and steering tail, both features discarded in the next models.



Title: File:Cheshire Regiment trench Somme 1916.jpg
URL: https://upload.wikimedia.org/wikipedia/commons/f/fa/Cheshire_Regiment_trench_Somme_1916.jpg
Caption: English:Britishtrench near theAlbert–Bapaumeroad atOvillers-la-Boisselle, July1916during theBattle of the Somme.



Title: File:Map of the Battle of the Somme, 1916.svg
URL: https://upload.wikimedia.org/wikipedia/commons/a/ab/Map_of_the_Battle_of_the_Somme%2C_1916.svg
Caption: English:Map of theBattle of the Somme, 1916.
