In [1]:
import wikipedia
import requests
from bs4 import BeautifulSoup
from datetime import datetime
from IPython.display import Image, display
from wikipedia.exceptions import DisambiguationError, PageError
import re

def get_person_info(person_name):
    try:
        # Step 1: Search for the best match
        search_results = wikipedia.search(person_name)
        if not search_results:
            print("No matching page found.")
            return

        best_match = search_results[0]
        page = wikipedia.page(best_match, auto_suggest=False)
        page_url = page.url

        # Step 2: Fetch and parse Wikipedia page
        response = requests.get(page_url)
        soup = BeautifulSoup(response.content, 'html.parser')

        # Step 3: Extract Name
        name_tag = soup.find("span", {"class": "mw-page-title-main"})
        name = name_tag.text if name_tag else "Name not found"

        # Step 4: Find the infobox
        infobox = soup.find("table", {"class": "infobox"})

        # Step 5: Check if it's a person using heuristics
        is_person = False
        if infobox:
            has_bday = infobox.find("span", {"class": "bday"})
            born_row = infobox.find("th", string=lambda x: x and "Born" in x)
            is_person = has_bday and born_row

        if not is_person:
            print(f"The page '{name}' does not appear to be about a person.")
            return

        # Step 6: Extract DOB, Birth Data, and Death (if applicable)
        dob = None
        age = None
        date_of_death = None
        birth_data = "Not found"
        is_deceased = False

        if infobox:
            # Date of birth
            bday_tag = infobox.find("span", {"class": "bday"})
            if bday_tag:
                dob_str = bday_tag.text.strip()
                if re.match(r"^\d{4}-\d{2}-\d{2}$", dob_str):
                    dob = datetime.strptime(dob_str, "%Y-%m-%d")

            # Date of death
            died_row = infobox.find("th", string=lambda x: x and "Died" in x)
            if died_row:
                died_td = died_row.find_next_sibling("td")
                if died_td:
                    for tag in died_td.find_all(["span", "sup", "small", "br"]):
                        tag.decompose()
                    date_of_death = died_td.get_text(" ", strip=True)
                    is_deceased = True

            # Age if alive
            if dob and not is_deceased:
                today = datetime.today()
                age = today.year - dob.year - ((today.month, today.day) < (dob.month, dob.day))

            # Birth Data
            born_row = infobox.find("th", string=lambda x: x and "Born" in x)
            if born_row:
                born_td = born_row.find_next_sibling("td")
                if born_td:
                    for tag in born_td.find_all(["span", "sup", "small", "br"]):
                        tag.decompose()
                    birth_data = born_td.get_text(" ", strip=True)

        # Step 7: Extract Occupation
        occupation = "Not found"
        if infobox:
            occ_row = infobox.find("th", string=lambda x: x and "Occupation" in x)
            if occ_row:
                occ_td = occ_row.find_next_sibling("td")
                if occ_td:
                    occupation = occ_td.get_text(separator=", ", strip=True)

        # Step 8: Extract profile image
        image_url = None
        if infobox:
            img = infobox.find("img")
            if img:
                image_url = "https:" + img["src"]

        # Step 9: Output Results
        result = {
            "Name": name,
            "Date of Birth": dob.strftime("%B %d, %Y") if dob else "Not found",
            "Age": age if age is not None else ("Not applicable (deceased)" if is_deceased else "Not found"),
            "Date of Death": date_of_death if is_deceased else None,
            "Birth Data": birth_data,
            "Occupation": occupation,
            "For More Info Wikipedia URL": page_url
        }

        print("\n📄 Extracted Information:")
        for key, value in result.items():
            if value is not None:
                print(f"{key}: {value}")

        print("\nProfile Image:")
        if image_url:
            display(Image(url=image_url))
        else:
            print("No image found.")

    except DisambiguationError as e:
        print(f"Multiple results found. Try being more specific. Options: {', '.join(e.options[:5])}")
    except PageError:
        print("Page not found. Try a different name.")
    except Exception as e:
        print(f"Something went wrong: {e}")

In [None]:
person = input("Enter a person name: ")
get_person_info(person)