In [2]:
import requests
from bs4 import BeautifulSoup
import time
import pandas as pd

BASE_URL = 'https://www.themoviedb.org'
TOP_RATED_URL = f'{BASE_URL}/movie/top-rated'
HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}
movie_data = []

def get_movie_details(movie_url):
    res = requests.get(movie_url, headers=HEADERS)
    if res.status_code != 200:
        return "N/A", []

    soup1 = BeautifulSoup(res.text, 'html.parser')

    # Extract overview
    overview_tag = soup1.select_one('div.overview p')
    overview = overview_tag.text.strip() if overview_tag else "No overview available."

    # Extract genres
    genre_tags = soup1.select('span.genres')
    genres = [g.text.strip() for g in genre_tags] if genre_tags else []

    return overview, genres

def scrape_top_movies():
    response = requests.get(TOP_RATED_URL, headers=HEADERS)
    if response.status_code != 200:
        print("Failed to load top-rated page.")
        return

    soup = BeautifulSoup(response.text, 'html.parser')
    movies = soup.select('div.card.style_1')

    for i, movie in enumerate(movies[:20], start=1):  # limit to first 20 to avoid hammering the site
        title_tag = movie.select_one('h2 a')
        title = title_tag.text.strip() if title_tag else "N/A"
        relative_link = title_tag['href'] if title_tag else ""
        movie_link = f"{BASE_URL}{relative_link}"


        overview, genres = get_movie_details(movie_link)
        movie_data.append({
            'Title': title,
            'Genres': ', '.join(genres),
            'Overview': overview
        })

        time.sleep(1)  # Be polite and avoid hammering the server

if __name__ == '__main__':
    scrape_top_movies()
    df = pd.DataFrame(movie_data)


In [3]:
df

Unnamed: 0,Title,Genres,Overview
0,The Shawshank Redemption,"Drama, Crime",Imprisoned in the 1940s for the double murder ...
1,The Godfather,"Drama, Crime","Spanning the years 1945 to 1955, a chronicle o..."
2,Selena Gomez: My Mind & Me,"Documentary, Music","After years in the limelight, Selena Gomez ach..."
3,The Godfather Part II,"Drama, Crime",In the continuing saga of the Corleone crime f...
4,Schindler's List,"Drama, History, War",The true story of how businessman Oskar Schind...
5,12 Angry Men,Drama,The defense and the prosecution have rested an...
6,Spirited Away,"Animation, Family, Fantasy","A young girl, Chihiro, becomes trapped in a st..."
7,The Dark Knight,"Drama, Action, Crime, Thriller",Batman raises the stakes in his war on crime. ...
8,Dilwale Dulhania Le Jayenge,"Comedy, Drama, Romance","Raj is a rich, carefree, happy-go-lucky second..."
9,The Green Mile,"Fantasy, Drama, Crime",A supernatural tale set on death row in a Sout...
