## Description

#### Purpose: To obtain movie details using TMDB ID for all movies on TMDB with theatrical release dates between 2010 and 2023.

#### Input: `2.1.2_TMDB_IDs_Theatrical.csv`

#### Outputs: `2.1.4_TMDB_External_IDs.csv`, `2.1.4_TMDB_Movie_Credits.csv`

This notebook takes the TMDB IDs for movies with theatrical releases and retrieves external IDs for that movie (i.e. IMDb and Wikidata ID) as well as the movie credits.

In [None]:
from tmdbv3api import TMDb
from tmdbv3api import Movie
from tmdbv3api.exceptions import TMDbException
import random
import pandas as pd
import csv
tmdb=TMDb()
tmdb.api_key=' '
    ## API key redacted
tmdb.language='en'
movie=Movie()

In [None]:
num_movies=0
i=1

# File Path for csv from 2.1.2
csv_file_path='../2.1.2 Filter Out Non-Theatrical Releases/Outputs/2.1.2_TMDB_IDs_Theatrical.csv'

# Create a list to store movie IDs
movie_data=[]

# Make a set of IDs to check against
written_movie_ids=set()

# Initialize Lists
movie_data=[]
imdb_ids = []
wikidata_ids = []

with open(csv_file_path,'r') as csv_file:
    csv_reader=csv.reader(csv_file)
    next(csv_reader) # Skip the header row
    # Loop until the end of the CSV
    while True:
        try:
            row=next(csv_reader)
            movie_id=int(row[1])
            
            #Query movie details by ID
            ext_ids=movie.external_ids(movie_id)
            
            if ext_ids:
                #print(ext_ids)
                movie_data.append(movie_id)
                imdb_ids.append(ext_ids.get('imdb_id'))
                wikidata_ids.append(ext_ids.get('wikidata_id'))
                #facebook_ids.append(ext_ids.get('facebook_id'))
                #instagram_ids.append(ext_ids.get('instagram_id'))
                #twitter_ids.append(ext_ids.get('twitter_id'))
                num_movies += 1
                if num_movies % 1000 == 0:
                    print(num_movies)

        except StopIteration:
            break
        except Exception as e:
            #Handle any exceptions
            print(f"Error processing movie ID {movie_id}:{e}")
ext_ids_df = pd.DataFrame({'ID':movie_data, 'imdb_id': imdb_ids, 'wikidata_id': wikidata_ids})
# Save
file_name='movie_external_ids.csv'
ext_ids_df.to_csv(file_name,index=False)
print(f"Operation Complete")

The below code block gets movie credits by
1. going through each movie and getting the credits
2. getting the cast IDs, cast order, and director IDs and saving it to the `movie_credits_df` *for that movie*
3. adding cast id, actor name, popularity, etc. to the overall `people_info` dataframe if the actor is not already there.

In [None]:
num_movies=0

csv_file_path='movie_release_date_revenue_nonzero_ids.csv'
people_info = pd.DataFrame(columns=['id', 'name', 'known_for_dept', 'popularity'])
movie_credits_df = pd.DataFrame(columns=['movie_id', 'cast_ids', 'order', 'director_ids'])
#movie_directors_df = pd.DataFrame(columns=['movie_id', 'directors_ids'])

with open(csv_file_path,'r') as csv_file:
    csv_reader=csv.reader(csv_file)
    next(csv_reader)  #Skip the header row
    #Loop until the end of the CSV
    while True:
        try:
            row=next(csv_reader)
            movie_id=int(row[1])
            credits=movie.credits(movie_id)
            if credits:
                cast = credits.get('cast')
                cast_ids = []
                order = []
                for person in cast:
                    person_id = person.get('id')
                    cast_ids.append(person_id)
                    order.append(person.get('order'))
                    # if person not in people dataframe, add them
                    if person_id not in people_info.loc[:,'id'].values:
                        person_name = person.get('name')
                        person_known = person.get('known_for_department')
                        person_pop = person.get('popularity')
                        people_info = pd.concat([people_info, pd.DataFrame({'id': [person_id], 'name': [person_name], 'known_for_dept': [person_known], 'popularity': [person_pop]})], ignore_index=True)
                crew = credits.get('crew')
                director_ids = []
                for person in crew:
                    if person.get('job') == 'Director':
                        person_id = person.get('id')
                        director_ids.append(person_id)
                        if person_id not in people_info.loc[:,'id'].values:
                            person_name = person.get('name')
                            person_known = person.get('known_for_department')
                            person_pop = person.get('popularity')
                            people_info = pd.concat([people_info, pd.DataFrame({'id': [person_id], 'name': [person_name], 'known_for_dept': [person_known], 'popularity': [person_pop]})], ignore_index=True)
                movie_credits_df = pd.concat([movie_credits_df, pd.DataFrame({'movie_id': [movie_id], 'cast_ids': [cast_ids], 'order': [order], 'director_ids': [director_ids]})], ignore_index=True)
            num_movies += 1
            if num_movies % 100 ==0:
                print(num_movies)
        except StopIteration:
            break
        except Exception as e:
            #Handle any exceptions
            print(f"Error processing movie ID {movie_id}:{e}")

file_name='movie_credits.csv'
movie_credits_df.to_csv(file_name,index=False)

print(f"Operation Complete")

In [None]:
people_info.to_csv('people_info.csv', index=False)