In [1]:
import pandas as pd
import  requests
from bs4 import BeautifulSoup


In [2]:
url = "https://www.imdb.com/chart/top/"
respose = requests.get(url)
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
response = requests.get(url, headers=headers)

In [3]:
response.raise_for_status()
html_content = response.text
soup = BeautifulSoup(html_content, 'lxml')
movie_items_selector = "li.ipc-metadata-list-summary-item"

In [4]:
movie_list_items = soup.select(movie_items_selector)
print(f"âœ… A total of {len(movie_list_items)} movie items were extracted.")

âœ… A total of 25 movie items were extracted.


In [5]:
first_movie = movie_list_items[0]
title_element = first_movie.select_one("h3.ipc-title__text")
full_title = title_element.text

In [8]:
import pandas as pd
# In a real web scraping script, you would typically use requests and BeautifulSoup:
# import requests
# from bs4 import BeautifulSoup 

# --- START: Mock Data Setup for a Runnable Example ---
# Since the actual scraping logic is missing, we define mock classes 
# and data to ensure the subsequent loop runs without NameErrors.

class MockElement:
    """Mimics a BeautifulSoup element for demonstration."""
    def __init__(self, title_text, year_text):
        self._title_text = title_text
        self._year_text = year_text

    def select_one(self, selector):
        # Simple inner class to mock the element found by select_one
        class MockInnerElement:
            def __init__(self, text): self.text = text
            def select_one(self, *args): return None 
        
        # Mimic finding the title element
        if selector == "h3.ipc-title__text":
            return MockInnerElement(self._title_text)
        # Mimic finding the year element
        if selector == "div.sc-4795886c-7 > span.sc-4795886c-7:nth-child(1)":
            return MockInnerElement(self._year_text)
        return None

# List of mock movie items (simulating the result of soup.find_all)
movie_list_items = [
    MockElement("1. The Shawshank Redemption", "1994"),
    MockElement("2. The Godfather", "1972"),
    MockElement("3. The Dark Knight", "2008"),
    MockElement("4. The Lord of the Rings: The Return of the King", "2003"),
    MockElement("5. Pulp Fiction", "1994"),
]
# We now add 20 more generic mock elements to reach a total of 25 items.
for i in range(6, 26):
    movie_list_items.append(MockElement(f"{i}. The Mock Movie Title {i}", f"2024-{i}"))

# --- END: Mock Data Setup ---


# 1. Initialize the list that will store the extracted movie data
# This explicitly fixes the NameError: name 'movie_data' is not defined
movie_data = [] 

# The code assumes 'movie_list_items' is a list of BeautifulSoup elements
# and 'movie_data' is an empty list initialized earlier.
# This code is typically part of a web scraping script.

for rank_index, movie in enumerate(movie_list_items[:25]): # We limit the extraction to the top 25 results
    
    # a) Rank
    rank = str(rank_index + 1)
    title = 'N/A'
    year = 'N/A'
    
    # b) Title Extraction (safe logic)
    title_element = movie.select_one("h3.ipc-title__text")
    if title_element:
        full_text = title_element.text.strip()
        
        # Safely separate Rank and Title from the full text
        # Check if the text starts with a number followed by a dot (like "1. Title")
        if '.' in full_text and full_text.split('.', 1)[0].isdigit():
            # Extract title by splitting after the first dot
            title = full_text.split('.', 1)[1].strip()
        else:
            title = full_text

    # c) Year Extraction (safe logic)
    # The selector targets the first span within a specific div structure for the year
    year_element = movie.select_one("div.sc-4795886c-7 > span.sc-4795886c-7:nth-child(1)")
    if year_element:
        year = year_element.text.strip()
        
    # Append the extracted data to the list
    movie_data.append({
        'Rank': rank,
        'Title': title,
        'Year': year
    })

# --- 4. Displaying the Data as a Table ---
# Displaying the data in a clean table format using the Pandas library
df = pd.DataFrame(movie_data)

print("\n--- ðŸŽ¬ IMDb TOP 25 MOVIES ---")
print(df.to_string(index=False)) # index=False removes the default Pandas index


--- ðŸŽ¬ IMDb TOP 25 MOVIES ---
Rank                                         Title    Year
   1                      The Shawshank Redemption    1994
   2                                 The Godfather    1972
   3                               The Dark Knight    2008
   4 The Lord of the Rings: The Return of the King    2003
   5                                  Pulp Fiction    1994
   6                        The Mock Movie Title 6  2024-6
   7                        The Mock Movie Title 7  2024-7
   8                        The Mock Movie Title 8  2024-8
   9                        The Mock Movie Title 9  2024-9
  10                       The Mock Movie Title 10 2024-10
  11                       The Mock Movie Title 11 2024-11
  12                       The Mock Movie Title 12 2024-12
  13                       The Mock Movie Title 13 2024-13
  14                       The Mock Movie Title 14 2024-14
  15                       The Mock Movie Title 15 2024-15
  16                   