In [None]:
import requests
import pandas as pd
from tqdm import tqdm
import time
import os

API_KEY = "eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiI3YTA4ZDFiNTIzNzAyMTA5MzI4M2ZlNDYyM2NiMmVlNiIsIm5iZiI6MTczOTYxMzMyMC42MzMsInN1YiI6IjY3YjA2NDg4ZTVhNmVlYmFkZjM2MjY0MCIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.DhAOCiWFMSjwVZu1N3Q7D8MrxQVJiKtTJUVaiEpYf-A"
HEADERS = {"Authorization": f"Bearer {API_KEY}"}
BASE_URL = "https://api.themoviedb.org/3"
LANG = "en-US"
MAX_PAGES_PER_YEAR = 1000

# –ö–æ–Ω—Ñ–∏–≥—É—Ä–∞—Ü–∏—è –ø–æ —Ç–∏–ø—É –¥–∞–Ω–Ω—ã—Ö
CONFIG = {
    "tv": {
        "years": range(1980, 1995),
        "checkpoint": "tmdb_checkpoint_tv_1980_1995.csv",
        "output": "tmdb_tv_1980_1995.csv"
    },
    "movie": {
        "years": range(1980, 1995),
        "checkpoint": "tmdb_checkpoint_movie_1980_1995.csv",
        "output": "tmdb_movies_1980_1995.csv"
    }
}

def enrich_items_batch(batch, media_type, processed_ids, all_items, checkpoint_path):
    enriched = []
    for item in tqdm(batch, desc=f"–û–±–æ–≥–∞—â–µ–Ω–∏–µ {media_type}"):
        url = f"{BASE_URL}/{media_type}/{item['tmdb_id']}"
        params = {"language": LANG}

        try:
            response = requests.get(url, headers=HEADERS, params=params)
            if response.status_code != 200:
                continue

            data = response.json()

            genres = [g['name'] for g in data.get("genres", [])]
            item['genres'] = ", ".join(genres)

            companies = [c['name'] for c in data.get("production_companies", [])]
            item['production_companies'] = ", ".join(companies)

            if media_type == "tv":
                networks = data.get("networks", [])
                item['network_id'] = networks[0].get("id") if networks else None
                item['network_name'] = networks[0].get("name") if networks else None
            else:
                item['network_id'] = None
                item['network_name'] = None

            enriched.append(item)
            processed_ids.add(str(item['tmdb_id']))
            all_items.append(item)

            if len(enriched) % 20 == 0:
                pd.DataFrame(all_items).to_csv(checkpoint_path, index=False)

            time.sleep(0.1)

        except Exception as e:
            print(f"–û—à–∏–±–∫–∞ –ø—Ä–∏ –æ–±—Ä–∞–±–æ—Ç–∫–µ {item['tmdb_id']}: {e}")

    return enriched

def discover_and_enrich(media_type, year, processed_ids, all_items, checkpoint_path):
    url = f"{BASE_URL}/discover/{media_type}"

    for page in range(1, MAX_PAGES_PER_YEAR + 1):
        params = {
            "language": LANG,
            "sort_by": "popularity.desc",
            "page": page
        }

        if media_type == "movie":
            params["primary_release_date.gte"] = f"{year}-01-01"
            params["primary_release_date.lte"] = f"{year}-12-31"
        else:
            params["first_air_date.gte"] = f"{year}-01-01"
            params["first_air_date.lte"] = f"{year}-12-31"

        try:
            response = requests.get(url, headers=HEADERS, params=params)
            if response.status_code != 200:
                break

            results = response.json().get("results", [])
            if not results:
                break

            batch = []
            for item in results:
                if str(item.get("id")) in processed_ids:
                    continue
                batch.append({
                    "tmdb_id": item.get("id"),
                    "name": item.get("title") if media_type == "movie" else item.get("name"),
                    "overview": item.get("overview"),
                    "release_date": item.get("release_date") if media_type == "movie" else item.get("first_air_date"),
                    "vote_average": item.get("vote_average"),
                    "vote_count": item.get("vote_count"),
                    "popularity": item.get("popularity"),
                    "original_language": item.get("original_language"),
                    "type": media_type
                })

            if batch:
                enrich_items_batch(batch, media_type, processed_ids, all_items, checkpoint_path)

            time.sleep(0.2)

        except Exception as e:
            print(f"–û—à–∏–±–∫–∞ –ø—Ä–∏ –∑–∞–≥—Ä—É–∑–∫–µ —Å—Ç—Ä–∞–Ω–∏—Ü—ã {page} {media_type} {year}: {e}")
            break

def run_pipeline(media_type):
    print(f"\nüöÄ –ó–∞–ø—É—Å–∫ —Å–±–æ—Ä–∞ –¥–ª—è: {media_type.upper()}")

    years = CONFIG[media_type]["years"]
    checkpoint_path = CONFIG[media_type]["checkpoint"]
    output_path = CONFIG[media_type]["output"]

    # –ó–∞–≥—Ä—É–∑–∫–∞ —á–µ–∫–ø–æ–∏–Ω—Ç–∞, –µ—Å–ª–∏ –µ—Å—Ç—å
    if os.path.exists(checkpoint_path):
        checkpoint_df = pd.read_csv(checkpoint_path, low_memory=False)
        processed_ids = set(checkpoint_df["tmdb_id"].astype(str))
        all_items = checkpoint_df.to_dict(orient="records")
        print(f"üîÅ –ü—Ä–æ–¥–æ–ª–∂–∞–µ–º —Å {len(all_items)} —É–∂–µ –∑–∞–≥—Ä—É–∂–µ–Ω–Ω—ã—Ö –∑–∞–ø–∏—Å–µ–π")
    else:
        processed_ids = set()
        all_items = []

    for year in tqdm(years, desc=f"üìÖ {media_type.upper()} - {years.start}‚Äì{years.stop - 1}"):
        print(f"\nüîç –û–±—Ä–∞–±–æ—Ç–∫–∞ {media_type} –∑–∞ {year}")
        discover_and_enrich(media_type, year, processed_ids, all_items, checkpoint_path)


    # –§–∏–Ω–∞–ª—å–Ω–æ–µ —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ
    df = pd.DataFrame(all_items)
    df.to_csv(output_path, index=False)
    print(f"\n‚úÖ –°–æ—Ö—Ä–∞–Ω–µ–Ω–æ {len(df)} –∑–∞–ø–∏—Å–µ–π –≤ {output_path}")

if __name__ == "__main__":
    run_pipeline("tv")     # —Å–Ω–∞—á–∞–ª–∞ —Å–µ—Ä–∏–∞–ª—ã
    run_pipeline("movie")  # –ø–æ—Ç–æ–º —Ñ–∏–ª—å–º—ã


üöÄ –ó–∞–ø—É—Å–∫ —Å–±–æ—Ä–∞ –¥–ª—è: TV


üìÖ TV - 1995‚Äì1995:   0%|          | 0/1 [00:00<?, ?it/s]


üîç –û–±—Ä–∞–±–æ—Ç–∫–∞ tv –∑–∞ 1995


–û–±–æ–≥–∞—â–µ–Ω–∏–µ tv: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:11<00:00,  1.81it/s]
–û–±–æ–≥–∞—â–µ–Ω–∏–µ tv: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:10<00:00,  1.86it/s]
–û–±–æ–≥–∞—â–µ–Ω–∏–µ tv: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:10<00:00,  1.92it/s]
–û–±–æ–≥–∞—â–µ–Ω–∏–µ tv: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:10<00:00,  1.82it/s]
–û–±–æ–≥–∞—â–µ–Ω–∏–µ tv: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:10<00:00,  1.86it/s]
–û–±–æ–≥–∞—â–µ–Ω–∏–µ tv: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:11<00:00,  1.74it/s]
–û–±–æ–≥–∞—â–µ–Ω–∏–µ tv: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:10<00:00,  1.83it/s]
–û–±–æ–≥–∞—â–µ–Ω–∏–µ tv: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:10<00:00,  1.83it/s]
–û–±–æ–≥–∞—â–µ–Ω–∏–µ tv: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:10<00:00,  1.91it/s]
–û–±–æ–≥–∞—â–µ–Ω–∏–µ tv: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:10<00:00,  1.88it/s]
–û–±–æ–≥–∞—â–µ–Ω–∏–µ tv: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [0


‚úÖ –°–æ—Ö—Ä–∞–Ω–µ–Ω–æ 1049 –∑–∞–ø–∏—Å–µ–π –≤ tmdb_tv_1995_1996.csv

üöÄ –ó–∞–ø—É—Å–∫ —Å–±–æ—Ä–∞ –¥–ª—è: MOVIE


üìÖ MOVIE - 1995‚Äì1995:   0%|          | 0/1 [00:00<?, ?it/s]


üîç –û–±—Ä–∞–±–æ—Ç–∫–∞ movie –∑–∞ 1995


–û–±–æ–≥–∞—â–µ–Ω–∏–µ movie: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:12<00:00,  1.62it/s]
–û–±–æ–≥–∞—â–µ–Ω–∏–µ movie: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:11<00:00,  1.68it/s]
–û–±–æ–≥–∞—â–µ–Ω–∏–µ movie: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:11<00:00,  1.80it/s]
–û–±–æ–≥–∞—â–µ–Ω–∏–µ movie: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:11<00:00,  1.80it/s]
–û–±–æ–≥–∞—â–µ–Ω–∏–µ movie: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:11<00:00,  1.70it/s]
–û–±–æ–≥–∞—â–µ–Ω–∏–µ movie: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:11<00:00,  1.72it/s]
–û–±–æ–≥–∞—â–µ–Ω–∏–µ movie: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:11<00:00,  1.77it/s]
–û–±–æ–≥–∞—â–µ–Ω–∏–µ movie: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:11<00:00,  1.80it/s]
–û–±–æ–≥–∞—â–µ–Ω–∏–µ movie: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:11<00:00,  1.76it/s]
–û–±–æ–≥–∞—â–µ–Ω–∏–µ movie: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:10<00:00,  1.85it/s]
–û–±–æ–≥–∞—â–µ–Ω–∏–µ movie: 100%|‚ñà‚ñà‚


‚úÖ –°–æ—Ö—Ä–∞–Ω–µ–Ω–æ 6531 –∑–∞–ø–∏—Å–µ–π –≤ tmdb_movies_1995_1996.csv





In [None]:
import pandas as pd
OUTPUT_FILE1 = "tmdb_tv_1980_1995.csv"
OUTPUT_FILE2 = "tmdb_movies_1980_1995.csv"
df1 = pd.read_csv(OUTPUT_FILE1, low_memory=False)
df2 = pd.read_csv(OUTPUT_FILE2, low_memory=False)
df = pd.concat([df1, df2], ignore_index=True)
df.shape

(7580, 13)

In [3]:
df.head()

Unnamed: 0,tmdb_id,name,overview,release_date,vote_average,vote_count,popularity,original_language,type,genres,production_companies,network_id,network_name
0,40605,Die Harald Schmidt Show,The Harald Schmidt Show is a German late night...,1995-12-05,7.0,16,349.5288,de,tv,Reality,,163.0,SAT.1
1,14424,Young Hearts,Malha√ß√£o is a Brazilian television series for ...,1995-04-24,6.1,31,250.4493,pt,tv,Soap,"Cintra Produ√ß√µes, Est√∫dios Globo",60.0,TV Globo
2,15844,A Kindred Spirit,A Kindred Spirit was a television drama series...,1995-05-15,0.0,0,128.6814,cn,tv,"Drama, Comedy",TVB,48.0,TVB Jade
3,46121,A Pr√≥xima V√≠tima,A young law student starts to work as a detect...,1995-03-13,7.6,23,87.54,pt,tv,"Soap, Crime, Drama",Est√∫dios Globo,60.0,TV Globo
4,97,The Drew Carey Show,Drew is an assistant director of personnel in ...,1995-09-13,6.55,159,85.9274,en,tv,Comedy,"Warner Bros. Television, Mohawk Productions",2.0,ABC


In [4]:
df.tail()

Unnamed: 0,tmdb_id,name,overview,release_date,vote_average,vote_count,popularity,original_language,type,genres,production_companies,network_id,network_name
7575,187227,Dream Theater: Awake in Japan,Dream Theater - Awake in Japan '95 dvd,1995-04-22,0.0,0,0.0,en,movie,,,,
7576,185806,Biohazard: Dynamo Open Air,"Biohazard Setlist at Dynamo Open Air 1995, re...",1995-09-23,0.0,0,0.0,en,movie,,,,
7577,179697,Kiss [1979] The Kiss Reels,"Promo Clips, Commericals......",1995-12-30,0.0,0,0.0,en,movie,,,,
7578,161156,Geert Hoste: Alleen,,1995-12-30,0.0,0,0.0,nl,movie,,,,
7579,156114,Ray Boltz The Concert of a Lifetime,"""When I was nineteen years old, I attended a c...",1995-01-01,0.0,0,0.0071,en,movie,,,,


In [6]:
df['tmdb_id'] = df['tmdb_id'].astype(str)
df[df['tmdb_id'].isin(['862'])]

Unnamed: 0,tmdb_id,name,overview,release_date,vote_average,vote_count,popularity,original_language,type,genres,production_companies,network_id,network_name
1049,862,Toy Story,"Led by Woody, Andy's toys live happily in his ...",1995-11-22,7.968,18727,22.4492,en,movie,"Animation, Adventure, Family, Comedy",Pixar,,


In [8]:
df3 = pd.read_csv("TMDB_1995_2025.csv", low_memory=False, sep=";", dtype={'tmdb_id': str})
df = pd.concat([df, df3], ignore_index=True)
df.shape

(366191, 13)

In [9]:
df.head()

Unnamed: 0,tmdb_id,name,overview,release_date,vote_average,vote_count,popularity,original_language,type,genres,production_companies,network_id,network_name
0,40605,Die Harald Schmidt Show,The Harald Schmidt Show is a German late night...,1995-12-05,7.0,16,349.5288,de,tv,Reality,,163.0,SAT.1
1,14424,Young Hearts,Malha√ß√£o is a Brazilian television series for ...,1995-04-24,6.1,31,250.4493,pt,tv,Soap,"Cintra Produ√ß√µes, Est√∫dios Globo",60.0,TV Globo
2,15844,A Kindred Spirit,A Kindred Spirit was a television drama series...,1995-05-15,0.0,0,128.6814,cn,tv,"Drama, Comedy",TVB,48.0,TVB Jade
3,46121,A Pr√≥xima V√≠tima,A young law student starts to work as a detect...,1995-03-13,7.6,23,87.54,pt,tv,"Soap, Crime, Drama",Est√∫dios Globo,60.0,TV Globo
4,97,The Drew Carey Show,Drew is an assistant director of personnel in ...,1995-09-13,6.55,159,85.9274,en,tv,Comedy,"Warner Bros. Television, Mohawk Productions",2.0,ABC


In [None]:
# df.to_csv("TMDB_1995_2025.csv", index=False, sep=";")