1- **API EXERCISE:** 


Browse the moviedb API https://developers.themoviedb.org/3/getting-started/introduction and find the top 5 (5 most average_vote) trending movies for the last week (iterate through all the pages).

For each movie, create a dictionary with name, release date and average vote, put in a list and show it.

Store it in a Collection called Movies in the ADS MongoDB cloud Database

In [1]:
import requests
import pymongo
from typing import Tuple

In [2]:
def read_api_key(path: str) -> str:
    '''
    Function used to read an API key stored in a file.
    
    Reads the given file and extracts the API key that is stored in it.
    
    Parameters
    ----------
        path: str
            Path to the file containing the API key.
    
    Returns
    -------
        api_key: str
            API key read from the given file.
    '''
    with open(path) as f:
        api_key = f.read().strip()
    
    return api_key

def read_credentials(path: str) -> Tuple[str]:
    '''
    Function used to read the MongoDB Cloud credentials stored in a file.
    
    Reads the given file and extracts the MongoDB Cloud credentials stored in it.
    The following credentials must be stored in the file, each in one line: user name,
    password and the cluster's URL.
    
    Parameters
    ----------
        path: str
            Path to the file containing the API key.
    
    Returns
    -------
        name: str
            Name of the user that has access to the database.
        
        password: str
            Password of the user that has access to the database.
        
        url: str
            URL used to access the cluster.
    '''
    with open(path) as f:
        name, password, url = f.read().splitlines()
    
    return name, password, url

In [3]:
# Set up initial data
url_tmdb = 'https://api.themoviedb.org/3/trending/movie/week'

api_key = read_api_key('api_key')
page = 1

parameters = {'page': page, 'api_key': api_key}

response = requests.get(url=url_tmdb, params=parameters)

top_5_movies = []

In [4]:
# The walrus operator (:=) evaluates the expression and assigns the result
# to the variable. Also, the condition is checked!
while response := requests.get(url=url_tmdb, params=parameters):
    # Get response as JSON and access to the movies
    json_response = response.json()
    movies_response = json_response['results']
    
    # Filter movies and remove the ones that don't have the vote_average, release_date
    # and title fields
    filtered_movies_list = list(filter(lambda m: 'vote_average' in m and 'release_date' in m and 'title' in m, movies_response))
    
    
    # Get new Top 5 movies by merging both lists and sorting them by the vote_average
    # The films that have the same score will then be sorted by release date
    # The new list must be reversed so that the films with higher average scores, and among
    # them, the most recent ones, appear at the top
    merged_movies = top_5_movies + filtered_movies_list
    top_5_movies = sorted(merged_movies, key=lambda m: (m['vote_average'], m['release_date']), reverse=True)
    top_5_movies = top_5_movies[:5]
    
    # Update parameters so next page is fetched in the next iteration
    page += 1
    parameters['page'] = page

In [5]:
# Get only relevant information of the top 5 movies
keys = ('title', 'release_date', 'vote_average')

top_5_movies = list(map(lambda m: {k: m[k] for k in keys}, top_5_movies))
print(top_5_movies)

[{'title': 'Dark Tales From Channel X', 'release_date': '2021-10-02', 'vote_average': 10.0}, {'title': 'The Survivalist', 'release_date': '2021-10-01', 'vote_average': 10.0}, {'title': 'Witch Hunt', 'release_date': '2021-10-01', 'vote_average': 10.0}, {'title': 'My Struggle', 'release_date': '2021-09-24', 'vote_average': 10.0}, {'title': 'The Zone', 'release_date': '2021-09-20', 'vote_average': 10.0}]


In [6]:
# Get MongoDB credentials
name, password, url = read_credentials('credentials.txt')

In [7]:
# Connect to MongoDB Cloud and close existing connection if the user
# is already connected
try:
    if 'client' in globals():
        print('Closing existing MongoDB Cloud connection...')
        client.close()
        print('Successfully closed previous MongoDB Cloud connection!')
    
    print('Connecting to MongoDB Cloud...')
    client = pymongo.MongoClient(f'mongodb+srv://{name}:{password}@{url}')
    print('Successfully connected to MongoDB Cloud!')
except pymongo.errors.ConnectionFailure as e:
    print(f'Could not connect to MongoDB Cloud: {e}')

Connecting to MongoDB Cloud...
Successfully connected to MongoDB Cloud!


In [8]:
db = client['ADS']
collection = db['Movies']

# Drop collection if it already exists (this lines can be commented)
if collection.count_documents({}) > 0:
    collection.drop()

# Save information
collection.insert_many(top_5_movies)

<pymongo.results.InsertManyResult at 0x7fa2144f3280>

In [9]:
# Check if information has been saved
for doc in collection.find():
    print(doc)

{'_id': ObjectId('615e3602ec9ebda3350eb4b5'), 'title': 'Dark Tales From Channel X', 'release_date': '2021-10-02', 'vote_average': 10.0}
{'_id': ObjectId('615e3602ec9ebda3350eb4b6'), 'title': 'The Survivalist', 'release_date': '2021-10-01', 'vote_average': 10.0}
{'_id': ObjectId('615e3602ec9ebda3350eb4b7'), 'title': 'Witch Hunt', 'release_date': '2021-10-01', 'vote_average': 10.0}
{'_id': ObjectId('615e3602ec9ebda3350eb4b8'), 'title': 'My Struggle', 'release_date': '2021-09-24', 'vote_average': 10.0}
{'_id': ObjectId('615e3602ec9ebda3350eb4b9'), 'title': 'The Zone', 'release_date': '2021-09-20', 'vote_average': 10.0}


In [10]:
# Close connection
client.close()