In [1]:
import requests
import pandas as pd
import os

In [23]:
def get_movie_credits(movie_id, api_key):
    url = f'https://api.themoviedb.org/3/movie/{movie_id}/credits?api_key={api_key}'
    try:
        res = requests.get(url)
        res.raise_for_status()
        credits = res.json()
        return credits
    except requests.exceptions.RequestException as e:
        print(f'Error fetching credits for movie {movie_id}: {e}')
        return {}

def get_movie_keywords(movie_id, api_key):
    url = f'https://api.themoviedb.org/3/movie/{movie_id}/keywords?api_key={api_key}'
    try:
        res = requests.get(url)
        res.raise_for_status()
        keywords = res.json().get('keywords', [])
        return [keyword['name'] for keyword in keywords]
    except requests.exceptions.RequestException as e:
        print(f'Error fetching keywords for movie {movie_id}: {e}')
        return []

def get_genres(api_key):
    url = f'https://api.themoviedb.org/3/genre/movie/list?api_key={api_key}'
    try:
        res = requests.get(url)
        res.raise_for_status()
        genres = res.json().get('genres', [])
        return {genre['id']: genre['name'] for genre in genres}
    except requests.exceptions.RequestException as e:
        print(f'Error fetching genres: {e}')
        return {}

def get_movies_with_details(lang, freq, api_key):
    url = f'https://api.themoviedb.org/3/movie/popular?api_key={api_key}&with_original_language={lang}'
    genres_map = get_genres(api_key)  # Get genre names
    movies = []
    page = 1
    progress = 0
    while len(movies) < freq:
        try:
            res = requests.get(url + "&page=" + str(page))
            res.raise_for_status()
        except requests.exceptions.RequestException as e:
            print(f'Error: {e}')
            return []
        res = res.json()

        if 'errors' in res:
            print('API error:', res['errors'])
            return movies
        if not res['results']:
            print('No more results available.')
            break
        for movie in res['results']:
            if len(movies) >= freq:
                break
            movie_id = movie['id']

            # Fetch and add cast, crew, keywords
            credits = get_movie_credits(movie_id, api_key)
            keywords = get_movie_keywords(movie_id, api_key)
            
            # Top 3 cast members
            movie['cast'] = [member['name'] for member in credits.get('cast', [])[:3]]
            
            # Only director
            director = next((member['name'] for member in credits.get('crew', []) if member['job'] == 'Director'), None)
            movie['director'] = director
            
            movie['keywords'] = keywords

            # Map genre IDs to names
            movie['genre_names'] = [genres_map.get(genre_id, "Unknown") for genre_id in movie.get('genre_ids', [])]
            
            movies.append(movie)

        # Progress update
        new_progress = round(len(movies) / freq * 100)
        if progress != new_progress:
            progress = new_progress
            if progress % 5 == 0:
                print(f'{progress}%, ', end="")
        page += 1
    return movies

In [27]:
api_key = os.getenv('tmdb_api_key') # register on tmdb and get api key

language_count = {
    'en':7000,
    'hi':2000
}

In [28]:
all_movies = []

for key in language_count:
  print("Downloading ", key, end=" : ")
  movies = get_movies_with_details(key,language_count[key], api_key)
  all_movies = all_movies + movies
  print('Total movies found : ', movies.__len__())

Downloading  en : 5%, 10%, 15%, 20%, 25%, 30%, 35%, 40%, 45%, 50%, 55%, 60%, 65%, 70%, 75%, 80%, 85%, 90%, 95%, 100%, Total movies found :  7000
Downloading  hi : 5%, 10%, 15%, 20%, 25%, 30%, 35%, 40%, 45%, 50%, 55%, 60%, 65%, 70%, 75%, 80%, 85%, 90%, 95%, 100%, Total movies found :  2000


In [29]:
df = pd.DataFrame(all_movies)

In [30]:
df

Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count,cast,director,keywords,genre_names
0,False,/yDHYTfA3R0jFYba16jBB1ef8oIt.jpg,"[28, 35, 878]",533535,en,Deadpool & Wolverine,A listless Wade Wilson toils away in civilian ...,5464.692,/8cdWjvZQUExUUTzyp4t6EDMubfO.jpg,2024-07-24,Deadpool & Wolverine,False,7.800,2414,"[Ryan Reynolds, Hugh Jackman, Emma Corrin]",Shawn Levy,"[hero, superhero, anti hero, mutant, breaking ...","[Action, Comedy, Science Fiction]"
1,False,/stKGOm8UyhuLPR9sZLjs5AkmncA.jpg,"[16, 10751, 12, 35]",1022789,en,Inside Out 2,Teenager Riley's mind headquarters is undergoi...,2641.200,/vpnVM9B6NMmQpWeZvzLvDESb2QY.jpg,2024-06-11,Inside Out 2,False,7.700,3024,"[Amy Poehler, Maya Hawke, Kensington Tallman]",Kelsey Mann,"[sadness, disgust, sequel, computer animation,...","[Animation, Family, Adventure, Comedy]"
2,False,/4ft6TR9wA6bra0RLL6G7JFDQ5t1.jpg,"[28, 35]",704239,en,The Union,A New Jersey construction worker goes from reg...,1686.712,/d9CTnTHip1RbVi2OQbA2LJJQAGI.jpg,2024-08-15,The Union,False,6.266,396,"[Mark Wahlberg, Halle Berry, J.K. Simmons]",Julian Farino,"[espionage, spy, old flame, miscegenation, con...","[Action, Comedy]"
3,False,/3q01ACG0MWm0DekhvkPFCXyPZSu.jpg,"[28, 80, 53, 35]",573435,en,Bad Boys: Ride or Die,"After their late former Captain is framed, Low...",1751.057,/oGythE98MYleE6mZlGs5oBGkux1.jpg,2024-06-05,Bad Boys: Ride or Die,False,7.580,1719,"[Will Smith, Martin Lawrence, Vanessa Hudgens]",Adil El Arbi,"[miami, florida, sequel, on the run, police de...","[Action, Crime, Thriller, Comedy]"
4,False,/lgkPzcOSnTvjeMnuFzozRO5HHw1.jpg,"[16, 10751, 35, 28]",519182,en,Despicable Me 4,"Gru and Lucy and their girls—Margo, Edith and ...",2051.558,/wWba3TaojhK7NdycRhoQpsG0FaH.jpg,2024-06-20,Despicable Me 4,False,7.300,1257,"[Steve Carell, Kristen Wiig, Will Ferrell]",Chris Renaud,"[superhero, villain, sequel, super villain, il...","[Animation, Family, Comedy, Action]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8995,False,/m3GqDIJo3U1pjcBCbHadFYbrOMX.jpg,"[16, 28, 14, 878]",39107,ja,ドラゴンボールZ 復活のフュージョン!! 悟空とベジータ,"Not paying attention to his job, a young demon...",45.664,/7AHvaEAeQfkfJ4OqcBePxa2ao09.jpg,1995-03-04,Dragon Ball Z: Fusion Reborn,False,7.523,817,"[Masako Nozawa, Ryo Horikawa, Takeshi Kusao]",Shigeyasu Yamauchi,"[martial arts, based on manga, anime, villian]","[Animation, Action, Fantasy, Science Fiction]"
8996,False,/gkzDoIYHxRsua4lbIe5UEsAnla9.jpg,"[14, 28, 18]",37958,en,Immortals,Theseus is a mortal man chosen by Zeus to lead...,41.916,/4SPPX5KIyaQ9O8n5rLvvUIiIhpF.jpg,2011-11-10,Immortals,False,6.021,2528,"[Henry Cavill, Mickey Rourke, Stephen Dorff]",Tarsem Singh,"[army, poison, mythology, zeus, sword fight, m...","[Fantasy, Action, Drama]"
8997,False,/z0tHLAHWRYm7nbwf9CyAZv9sj2j.jpg,"[18, 10749]",51828,en,One Day,"A romantic comedy centered on Dexter and Emma,...",47.661,/n9jMwSg4IavdD8wqdYnyW5w3Mvp.jpg,2011-03-02,One Day,False,7.434,4193,"[Anne Hathaway, Jim Sturgess, Tom Mison]",Lone Scherfig,"[based on novel or book, comedian, love, autho...","[Drama, Romance]"
8998,False,/7Wev9JMo6R5XAfz2KDvXb7oPMmy.jpg,"[9648, 53]",77,en,Memento,Leonard Shelby is tracking down the man who ra...,40.657,/nWtySDlffTfwAa0rSfq61o33ZXV.jpg,2000-10-11,Memento,False,8.182,14634,"[Guy Pearce, Carrie-Anne Moss, Joe Pantoliano]",Christopher Nolan,"[drug dealer, amnesia, insulin, tattoo, waitre...","[Mystery, Thriller]"


In [31]:
df.columns

Index(['adult', 'backdrop_path', 'genre_ids', 'id', 'original_language',
       'original_title', 'overview', 'popularity', 'poster_path',
       'release_date', 'title', 'video', 'vote_average', 'vote_count', 'cast',
       'director', 'keywords', 'genre_names'],
      dtype='object')

In [32]:
df.iloc[0]

adult                                                            False
backdrop_path                         /yDHYTfA3R0jFYba16jBB1ef8oIt.jpg
genre_ids                                                [28, 35, 878]
id                                                              533535
original_language                                                   en
original_title                                    Deadpool & Wolverine
overview             A listless Wade Wilson toils away in civilian ...
popularity                                                    5464.692
poster_path                           /8cdWjvZQUExUUTzyp4t6EDMubfO.jpg
release_date                                                2024-07-24
title                                             Deadpool & Wolverine
video                                                            False
vote_average                                                       7.8
vote_count                                                        2414
cast  

In [33]:
df.to_csv('movies.csv', index=False)