In [None]:
from dotenv import load_dotenv
import os

In [None]:
# Load the .env file
load_dotenv()

In [None]:
# Access the variable
tmdb_api_key = os.getenv('TMDB_API_KEY')

# print(tmdb_api_key)  # This will print: tmd_api_key

Rated Movies
GET
https://api.themoviedb.org/3/account/{account_id}/rated/movies

In [65]:
import requests

url = "https://api.themoviedb.org/3/discover/movie?include_adult=false&include_video=false&language=en-US&page=1&sort_by=popularity.desc&vote_average.gte=8&vote_count.gte=1000"

headers = {
    "accept": "application/json",
    "Authorization": f"Bearer {tmdb_api_key}"
}

movie_response = requests.get(url, headers=headers).json()

In [66]:
url = "https://api.themoviedb.org/3/discover/movie?include_adult=false&include_video=false&language=en-US&page=2&sort_by=popularity.desc&vote_average.gte=8&vote_count.gte=1000"

headers = {
    "accept": "application/json",
    "Authorization": f"Bearer {tmdb_api_key}"
}

movie_response2 = requests.get(url, headers=headers).json()
movie_response['results'] += movie_response2['results']

In [67]:
url = "https://api.themoviedb.org/3/discover/movie?include_adult=false&include_video=false&language=en-US&page=3&sort_by=popularity.desc&vote_average.gte=8&vote_count.gte=1000"

headers = {
    "accept": "application/json",
    "Authorization": f"Bearer {tmdb_api_key}"
}

movie_response3 = requests.get(url, headers=headers).json()
movie_response['results'] += movie_response3['results']

In [None]:
# print(movie_response.keys())
# print(movie_response['results'][0].keys())

In [78]:
secure_base_url = "https://image.tmdb.org/t/p/"
poster_sizes = ["w92", "w154", "w185", "w342", "w500", "w780", "original"]
use_poster_size = poster_sizes[2]
backdrop_sizes = ["w300", "w780", "w1280", "original"]
use_backdrop_size = backdrop_sizes[1]
poster_base_url = secure_base_url + use_poster_size
backdrop_base_url = secure_base_url + use_backdrop_size

In [46]:
url = "https://api.themoviedb.org/3/configuration/languages"

headers = {
    "accept": "application/json",
    "Authorization": f"Bearer {tmdb_api_key}"
}

language_response = requests.get(url, headers=headers).json()

In [None]:
# print(language_response)
languages = dict()
for language in language_response:
    languages[language['iso_639_1']] = language['english_name']
# print(languages)

In [70]:
movie_lst = []

for movie in movie_response['results']:
    # print(movie['title'], movie['vote_average'], movie['vote_count'])
    new_row = {'title': movie['title'], 'plot_summary': movie['overview'],'release_date': movie['release_date'], 'poster_url': poster_base_url + movie['poster_path'], 'backdrop_url': backdrop_base_url + movie['backdrop_path'], 'language': languages[movie['original_language']], 'tmdb_id': movie['id']}

    url = f"https://api.themoviedb.org/3/movie/{movie['id']}?language=en-US"

    headers = {
        "accept": "application/json",
        "Authorization": f"Bearer {tmdb_api_key}"
    }

    specific_movie_response = requests.get(url, headers=headers).json()
    new_row['duration_in_mins'] = specific_movie_response['runtime']

    movie_lst.append(new_row)
# print(df)


In [38]:
import pandas as pd

In [71]:
df = pd.DataFrame(movie_lst, columns=['title', 'plot_summary', 'release_date', 'poster_url', 'backdrop_url', 'language', 'duration_in_mins'])
# Write the DataFrame to a CSV file
df.to_csv('movie.csv', index=False)

Get MoviePerson details

Take top 8 casts from each movie

In [75]:
cast_limit = 8
people_ids = set()
movie_to_cast = dict()
movie_to_director = dict()

for movie in movie_lst:
    url = f"https://api.themoviedb.org/3/movie/{movie['tmdb_id']}/credits"

    headers = {
        "accept": "application/json",   
        "Authorization": f"Bearer {tmdb_api_key}"
    }
    
    casts_response = requests.get(url, headers=headers).json()

    movie_to_cast[movie['tmdb_id']] = [cast['id'] for cast in casts_response['cast'] if cast['order'] < cast_limit]
    for people_id in movie_to_cast[movie['tmdb_id']]:
        people_ids.add(people_id)
    
    movie_to_director[movie['tmdb_id']] = [crew['id'] for crew in casts_response['crew'] if crew['job'] == 'Director']
    for people_id in movie_to_director[movie['tmdb_id']]:
        people_ids.add(people_id)

    for cast in casts_response['cast']:
        if cast['order'] >= cast_limit:
            break
        people_ids.add(cast['id'])

print(len(people_ids)) 

474


In [80]:
people_lst = []
for people_id in people_ids:
    url = f"https://api.themoviedb.org/3/person/{people_id}"

    headers = {
        "accept": "application/json",   
        "Authorization": f"Bearer {tmdb_api_key}"
    }

    people_response = requests.get(url, headers=headers).json()

    new_row = {
        'name': people_response.get('name', ''),
        'death_of_birth': people_response.get('birthday', ''),
        'biography': people_response.get('biography', ''),
        'image_url': poster_base_url + (people_response.get('profile_path') or ''),
        'date_of_death': people_response.get('deathday', ''),
        'tmdb_id': people_response.get('id', ''),
        'place_of_birth': people_response.get('place_of_birth', '')
    }

    people_lst.append(new_row)

print(len(people_lst))

474


In [81]:
df = pd.DataFrame(people_lst, columns=['name', 'death_of_birth', 'biography', 'image_url', 'date_of_death', 'name', 'place_of_birth'])
df.to_csv('movie_person.csv', index=False)

Get all genres and create the genre table

In [83]:
url = "https://api.themoviedb.org/3/genre/movie/list?language=en"

headers = {
    "accept": "application/json",
    "Authorization": f"Bearer {tmdb_api_key}"
}

genres_response = requests.get(url, headers=headers).json()

In [85]:
genre_dict = dict()
genre_lst = []
for genre in genres_response['genres']:
    genre_dict[genre['id']] = genre['name']
    genre_lst.append({'name': genre['name']})

print(len(genre_lst))

19


In [None]:
df = pd.DataFrame(genre_lst, columns=['name'])
df.to_csv('genre.csv', index=False)