In [19]:
import requests
import pandas as pd

def get_movies_on_page(page, genre_id):
    url = f'https://api.themoviedb.org/3/discover/movie?api_key={api_key}&page={page}&with_genres={genre_id}'
    response = requests.get(url)
    return response.json()['results']

def get_movie_credits(movie_id):
    url = f'https://api.themoviedb.org/3/movie/{movie_id}/credits?api_key={api_key}'
    response = requests.get(url)
    return response.json()

def get_movie_details(movie_id):
    url = f'https://api.themoviedb.org/3/movie/{movie_id}?api_key={api_key}'
    response = requests.get(url)
    return response.json()

def get_genres():
    url = f'https://api.themoviedb.org/3/genre/movie/list?api_key={api_key}'
    response = requests.get(url)
    return response.json()['genres']

api_key = "api_key"
movie_data_list = []

# Get the list of all genres
genres = get_genres()

# For each genre, get a number of movies
for genre in genres:
    genre_id = genre['id']
    page = 1
    total_pages = 90 

    while page <= total_pages:
        movies = get_movies_on_page(page, genre_id)
        
        for movie in movies:
            movie_id = movie['id']
            details = get_movie_details(movie_id)
            credits = get_movie_credits(movie_id)
            release_date = details['release_date']

            # Check if release_date is not an empty string before processing
            if release_date:
                release_year = int(release_date.split('-')[0])

                if release_year < 2000:
                    continue
            else:
                continue  # If there's no release_date, skip this movie

            directors = [member['name'] for member in credits['crew'] if member['job'] == 'Director']
            actors = [member['name'] for member in credits['cast'][:10]]

            movie_data = {
                'imdb_id': details['imdb_id'],
                'title': details['title'],
                'overview': details['overview'],
                'release_date': release_date,
                'runtime': details['runtime'],
                'status': details['status'],
                'tagline': details['tagline'],
                'adult': details['adult'],
                'vote_average': details['vote_average'],
                'vote_count': details['vote_count'],  # Added vote count
                'genres': [genre['name'] for genre in details['genres']],
                'production_companies': [company['name'] for company in details['production_companies']],
                'directors': directors,
                'actors': actors,
            }

            movie_data_list.append(movie_data)

        page += 1

movie_df = pd.DataFrame(movie_data_list)
print(movie_df)
print(movie_df.info())


          imdb_id                              title  \
0      tt10366206               John Wick: Chapter 4   
1       tt5433140                             Fast X   
2      tt14846026                               Sisu   
3      tt10954600  Ant-Man and the Wasp: Quantumania   
4       tt6791350     Guardians of the Galaxy Vol. 3   
...           ...                                ...   
24074   tt1219336                   The Donner Party   
24075   tt4029234                Day of the Stranger   
24076   tt1671749                          Toro Loco   
24077   tt3558104                           Bordello   
24078   tt7802198                Motorpsycho Maniacs   

                                                overview release_date  \
0      With the price on his head ever increasing, Jo...   2023-03-22   
1      Over many missions and against impossible odds...   2023-05-17   
2      Deep in the wilderness of Lapland, Aatami Korp...   2022-10-12   
3      Super-Hero partners Scott La

In [20]:
# After all data collection and creating the DataFrame:

# Round the ratings to the nearest integer
movie_df['rating_rounded'] = movie_df['vote_average'].apply(round)

print(movie_df)


          imdb_id                              title  \
0      tt10366206               John Wick: Chapter 4   
1       tt5433140                             Fast X   
2      tt14846026                               Sisu   
3      tt10954600  Ant-Man and the Wasp: Quantumania   
4       tt6791350     Guardians of the Galaxy Vol. 3   
...           ...                                ...   
24074   tt1219336                   The Donner Party   
24075   tt4029234                Day of the Stranger   
24076   tt1671749                          Toro Loco   
24077   tt3558104                           Bordello   
24078   tt7802198                Motorpsycho Maniacs   

                                                overview release_date  \
0      With the price on his head ever increasing, Jo...   2023-03-22   
1      Over many missions and against impossible odds...   2023-05-17   
2      Deep in the wilderness of Lapland, Aatami Korp...   2022-10-12   
3      Super-Hero partners Scott La

In [21]:
print(movie_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24079 entries, 0 to 24078
Data columns (total 15 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   imdb_id               23569 non-null  object 
 1   title                 24079 non-null  object 
 2   overview              24079 non-null  object 
 3   release_date          24079 non-null  object 
 4   runtime               24079 non-null  int64  
 5   status                24079 non-null  object 
 6   tagline               24079 non-null  object 
 7   adult                 24079 non-null  bool   
 8   vote_average          24079 non-null  float64
 9   vote_count            24079 non-null  int64  
 10  genres                24079 non-null  object 
 11  production_companies  24079 non-null  object 
 12  directors             24079 non-null  object 
 13  actors                24079 non-null  object 
 14  rating_rounded        24079 non-null  int64  
dtypes: bool(1), float64

In [22]:
# Get the count of rows for each unique rating
rating_counts = movie_df['rating_rounded'].value_counts()

print(rating_counts)


7     9288
6     8101
8     2991
5     2131
0      662
4      540
9      116
2       96
3       94
10      37
1       23
Name: rating_rounded, dtype: int64


In [15]:

# Convert DataFrame to CSV
movie_df.to_csv(r'C:\Users\andre\Desktop\New folder\moviess.csv', index=False)
