In [2]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

# IMDb Top 250 Movies URL
IMDB_URL = "https://www.imdb.com/chart/top/"

# Headers to mimic a real browser request
headers = {"User-Agent": "Mozilla/5.0"}

# Fetch the IMDb page
response = requests.get(IMDB_URL, headers=headers)

# Ensure the page is successfully loaded
if response.status_code == 200:
    print("✅ Successfully fetched IMDb page.")
else:
    print(f"❌ Error fetching IMDb page, status code: {response.status_code}")

# Parse the page using BeautifulSoup
soup = BeautifulSoup(response.text, "html.parser")

# Extract the movie details
movies = []
for row in soup.select("tbody.lister-list tr")[:100]:  # Get top 100 movies
    try:
        title_column = row.select_one("td.titleColumn")
        title = title_column.a.text  # Movie title
        year = title_column.span.text.strip("()")  # Year of release
        rating = row.select_one("td.imdbRating strong").text if row.select_one("td.imdbRating strong") else "N/A"  # IMDb rating
        link = "https://www.imdb.com" + title_column.a["href"]  # Movie link

        # Open each movie's page to extract more details
        movie_response = requests.get(link, headers=headers)
        movie_soup = BeautifulSoup(movie_response.text, "html.parser")

        # Extract additional details
        genre = ", ".join([g.text for g in movie_soup.select("div[data-testid='genres'] a")])
        director = movie_soup.select_one("a[data-testid='title-pc-principal-credit']").text if movie_soup.select_one("a[data-testid='title-pc-principal-credit']") else "N/A"
        runtime = movie_soup.select_one("li[data-testid='title-techspec_runtime']").text if movie_soup.select_one("li[data-testid='title-techspec_runtime']") else "N/A"

        movies.append({
            "Title": title,
            "Year": year,
            "IMDb Rating": rating,
            "Genre": genre,
            "Director": director,
            "Runtime": runtime,
            "IMDb Link": link
        })
        
        print(f"✅ Scraped: {title}")

    except Exception as e:
        print(f"❌ Error scraping movie data: {e}")

# Check if the data was successfully collected
if movies:
    # Convert list to DataFrame
    df = pd.DataFrame(movies)
    
    # Save to CSV
    df.to_csv("YourLastName_IMDb_Top100.csv", index=False)
    print("✅ IMDb Top 100 Movies saved to CSV! 🎬")
else:
    print("❌ No data collected, check for scraping issues.")


✅ Successfully fetched IMDb page.
❌ No data collected, check for scraping issues.
