# Extract Open Graph (OG) Metadata from a Website

This notebook shows how to fetch and parse Open Graph (OG) metadata (such as title, description, image, etc.) from a given website using Python.

In [None]:
!pip install bs4

In [5]:
import requests
from bs4 import BeautifulSoup

def get_open_graph_data(url: str) -> dict:
    """Fetch OG metadata from a webpage."""
    headers = {"User-Agent": "Mozilla/5.0"}  # prevent blocking
    resp = requests.get(url, headers=headers, timeout=10)
    resp.raise_for_status()

    # Use html.parser as fallback if lxml isn't available
    soup = BeautifulSoup(resp.text, "html.parser")  # Changed from "lxml" to "html.parser"
    og_data = {}

    # Extract OG tags
    for tag in soup.find_all("meta"):
        if tag.get("property", "").startswith("og:"):
            og_data[tag["property"][3:]] = tag.get("content", "")
    
    return og_data


In [None]:
# Example usage
url = "https://www.imdb.com/title/tt4154796/"  # Avengers: Endgame
og_data = get_open_graph_data(url)
og_data

{'url': 'https://www.imdb.com/title/tt4154796/',
 'site_name': 'IMDb',
 'title': 'Avengers: Endgame (2019) ⭐ 8.4 | Action, Adventure, Sci-Fi',
 'description': '3h 1m | K-12',
 'type': 'video.movie',
 'image': 'https://m.media-amazon.com/images/M/MV5BMTc5MDE2ODcwNV5BMl5BanBnXkFtZTgwMzI2NzQ2NzM@._V1_FMjpg_UX1000_.jpg',
 'image:height': '1481.9102749638205',
 'image:width': '1000',
 'locale': 'en_US',
 'locale:alternate': 'de_DE'}