## Description
#### Purpose: To filter out movies on TMDB without a theatrical release.
#### Input: `2.1.1_TMDB_IDs_2010_2023.csv`
#### Output: `2.1.2_TMDB_IDs_Theatrical.csv`
TMDB houses metadata on movies of all forms, including those that were not released in theaters. Since this project is concerned with only the movies premiering in theaters, this notebook filters out movies with non-theatrical releases.

In [None]:
from tmdbv3api import TMDb
from tmdbv3api import Movie
from tmdbv3api.exceptions import TMDbException
import random
import pandas as pd
import csv
tmdb=TMDb()
tmdb.api_key=' '
    ## API key redacted
tmdb.language='en'
movie=Movie()

In [None]:
num_movies=0
i=1

# File Path for csv from 2.1.1
csv_file_path='../2.1.1 Get TMDB IDs 2010-2023/Outputs/2.1.1_TMDB_IDs_2010_2023.csv'

# Create a list to store movie IDs
movie_data=[]

# Make a set of IDs to check against
written_movie_ids=set()

with open(csv_file_path,'r') as csv_file:
    csv_reader=csv.reader(csv_file)
    next(csv_reader) # Skip the header row
    
    # Loop until the end of the CSV
    while True:
        try:
            row=next(csv_reader)
            movie_id=int(row[1])
            
            # Query movie details by ID
            movie_info=movie.details(movie_id)
            if movie_info:
                release_dates=movie_info.get('release_dates',{})
                results=release_dates.get('results',[])

                for result in results:
                    for release_date in result.get('release_dates',[]):
                        # If release is theatrical:
                        if release_date.get('type')==3:
                            # Write the movie ID to the list
                            if movie_id not in written_movie_ids:
                                movie_data.append(movie_id)
                                written_movie_ids.add(movie_id) # Add the movie ID to the set
                                num_movies+=1
                                if num_movies % 1000 == 0:
                                    print(f"Processed {num_movies} movies")
        except StopIteration:
            break
        except Exception as e:
            # Handle any exceptions
            print(f"Error processing movie ID {movie_id}:{e}")
            
# Create a DataFrame from the movie_data list
df=pd.DataFrame({'ID': movie_data})

# Add an index column
df.insert(0,'Index',range(1, len(df) + 1))

# Save 
file_name='./Outputs/2.1.2_TMDB_IDs_Theatrical.csv'
df.to_csv(file_name,index=False)
print(f"Operation Complete")