In [None]:
import pandas as pd
import requests
import time

# Horror movies dataset
movies = pd.read_csv("../data_raw/Horror Movies IMDb.csv")

movies.head()


Unnamed: 0,Movie Title,Movie Year,Runtime,Genre,Rating,Director,Votes,Gross
0,Alien,1979,117,"Horror, Sci-Fi",8.5,Ridley Scott,905275,$78.90M
1,Psycho,1960,109,"Horror, Mystery, Thriller",8.5,Alfred Hitchcock,689068,$32.00M
2,The Shining,1980,146,"Drama, Horror",8.4,Stanley Kubrick,1051582,$44.02M
3,The Thing,1982,109,"Horror, Mystery, Sci-Fi",8.2,John Carpenter,439793,$13.78M
4,Tumbbad,2018,104,"Drama, Fantasy, Horror",8.2,Rahi Anil Barve,53297,


In [2]:
def search_tmdb_movie(title, year=None):
    """
    Verilen title (+opsiyonel year) için TMDB'de arama yapar.
    Bulursa: tmdb_id, tmdb_title, release_date döner.
    """
    url = "https://api.themoviedb.org/3/search/movie"
    params = {
        "api_key": TMDB_API_KEY,
        "query": title,
        "include_adult": False,
    }
    if pd.notna(year):
        try:
            params["year"] = int(year)
        except:
            pass

    r = requests.get(url, params=params)
    data = r.json()

    results = data.get("results", [])
    if not results:
        return None

    m = results[0]
    return {
        "tmdb_id": m["id"],
        "tmdb_title": m["title"],
        "tmdb_release_date": m.get("release_date")
    }

# Küçük test:
search_tmdb_movie("Alien", 1979)


{'tmdb_id': 348, 'tmdb_title': 'Alien', 'tmdb_release_date': '1979-05-25'}

In [1]:
def fetch_tmdb_reviews_for_movie(tmdb_id, max_reviews=10):
    """
    Verilen tmdb_id için en fazla max_reviews review döner.
    Her review: author, content, created_at, rating
    """
    url = f"https://api.themoviedb.org/3/movie/{tmdb_id}/reviews"
    all_reviews = []
    page = 1

    while len(all_reviews) < max_reviews:
        params = {
            "api_key": TMDB_API_KEY,
            "page": page
        }
        r = requests.get(url, params=params)
        data = r.json()
        results = data.get("results", [])

        if not results:
            break

        for rev in results:
            author = rev.get("author")
            content = rev.get("content")
            created_at = rev.get("created_at")
            author_details = rev.get("author_details", {})
            rating = author_details.get("rating")  # bazen None olabiliyor

            all_reviews.append({
                "tmdb_id": tmdb_id,
                "author": author,
                "content": content,
                "created_at": created_at,
                "author_rating": rating
            })

            if len(all_reviews) >= max_reviews:
                break

        # başka sayfa var mı?
        if page >= data.get("total_pages", 1):
            break

        page += 1
        time.sleep(0.2)  # API'yı çok hızlı dövmeyelim :)

    return all_reviews


In [None]:
all_review_rows = []

# Tüm filmler için reviewları çekelim
subset = movies.iloc[:].copy()

for idx, row in subset.iterrows():
    title = row["Movie Title"]
    year = row["Movie Year"]

    print(f"Processing: {title} ({year})")

    # 1) TMDB'de filmi bul
    info = search_tmdb_movie(title, year)
    # Eğer bulunamadıysa bulunamadı mesajı:
    if info is None:
        print("  -> TMDB'de bulunamadı, geçiyorum.")
        continue

    tmdb_id = info["tmdb_id"]

    # 2) Bu film için reviewları çek
    movie_reviews = fetch_tmdb_reviews_for_movie(tmdb_id, max_reviews=10)

    # 3) Her review'a film bilgilerini ekleyelim
    for rev in movie_reviews:
        rev["movie_title"] = title
        rev["movie_year"] = year
        all_review_rows.append(rev)

    time.sleep(0.3)  # biraz daha yavaşlatalım

len(all_review_rows)


Processing: Alien (1979)
Processing: Psycho (1960)
Processing: The Shining (1980)
Processing: The Thing (1982)
Processing: Tumbbad (2018)
Processing: The Exorcist (1973)
Processing: Diabolique (1955)
Processing: Rosemary's Baby (1968)
Processing: What Ever Happened to Baby Jane? (1962)
Processing: The Cabinet of Dr. Caligari (1920)
Processing: The Blue Elephant (2014)
Processing: Shaun of the Dead (2004)
Processing: Nosferatu (1922)
Processing: Let the Right One In (2008)
Processing: King Kong (1933)
Processing: Get Out I (2017)
  -> TMDB'de bulunamadı, geçiyorum.
Processing: Predator (1987)
Processing: I Saw the Devil (2010)
Processing: Dawn of the Dead (1978)
Processing: Night of the Living Dead (1968)
Processing: Freaks (1932)
Processing: Frankenstein (1931)
Processing: The Innocents (1961)
Processing: The Bride of Frankenstein (1935)
Processing: Evil Dead II (1987)
Processing: Halloween (1978)
Processing: Invasion of the Body Snatchers (1956)
Processing: American Psycho (2000)
Proc

2099

In [9]:
tmdb_reviews_df = pd.DataFrame(all_review_rows)
tmdb_reviews_df["movie_title"].value_counts().head()
tmdb_reviews_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2099 entries, 0 to 2098
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   tmdb_id        2099 non-null   int64  
 1   author         2099 non-null   object 
 2   content        2099 non-null   object 
 3   created_at     2099 non-null   object 
 4   author_rating  1968 non-null   float64
 5   movie_title    2099 non-null   object 
 6   movie_year     2099 non-null   int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 114.9+ KB


In [10]:
tmdb_reviews_df.to_csv("../data_clean/horror_tmdb_reviews_sample.csv", index=False)
