In [25]:
from pymongo import MongoClient

In [26]:
# Connection URL
connection_url = "mongodb://localhost:27017/"
client = MongoClient(connection_url)

In [28]:
db = client['tmdb_db']
collection = db['movies']

Notar que nuestra colección de películas guarda 3 tipos de objeto:
* Película
* Género
* Actor

In [29]:
collection.distinct('type')

['actor', 'genre']

In [36]:
pipeline = [
    {"$match": {"type": {"$exists": False}}},  # Assuming 'type' does not exist in movie documents
    
    {"$lookup": {
        "from": "movies",  # Assuming genres are in the same collection
        "localField": "genre_ids",
        "foreignField": "id",
        "as": "genre_details"
    }},
    
    {"$unwind": "$genre_details"},

    {"$match": {"genre_details.type": "genre"}},  # Ensure only genre documents are used

    {"$unwind": "$genre_ids"},
    
    {"$match": {"$expr": {"$eq": ["$genre_ids", "$genre_details.id"]}}},

    {"$group": {
        "_id": "$genre_details.name",
        "AverageRating": {"$avg": "$vote_average"}
    }},
    
    {"$sort": {"AverageRating": -1}}
]

average_ratings_by_genre = collection.aggregate(pipeline)
print("Ratings promedio por género:\n")
for genre in average_ratings_by_genre:
    print(f"Genre: {genre['_id']} - Average Rating: {genre['AverageRating']:.2f}")



Ratings promedio por género:

Genre: History - Average Rating: 7.81
Genre: Documentary - Average Rating: 7.41
Genre: Crime - Average Rating: 7.33
Genre: War - Average Rating: 7.23
Genre: Drama - Average Rating: 7.21
Genre: Science Fiction - Average Rating: 7.13
Genre: Adventure - Average Rating: 7.07
Genre: Mystery - Average Rating: 7.06
Genre: Comedy - Average Rating: 7.01
Genre: Action - Average Rating: 6.90
Genre: Romance - Average Rating: 6.67
Genre: Thriller - Average Rating: 6.58
Genre: Fantasy - Average Rating: 6.27
Genre: Family - Average Rating: 6.15
Genre: Horror - Average Rating: 6.10
Genre: Animation - Average Rating: 6.04
Genre: TV Movie - Average Rating: 5.78


In [35]:
pipeline = [
    # Match all movie documents
    {"$match": {
        "type": {"$exists": False}  # Ensure we're looking at movie documents
    }},
    
    # Group by movie title and take the first document of each group
    {"$group": {
        "_id": "$title",
        "document": {"$first": "$$ROOT"},  # Retain the first document encountered
        "maxPopularity": {"$max": "$popularity"}  # Get max popularity for the title
    }},
    
    # Sort by max popularity in descending order
    {"$sort": {"maxPopularity": -1}},
    
    # Limit the results to the top 10 movies
    {"$limit": 5},
    
    # Replace the root to output the whole document
    {"$replaceRoot": {"newRoot": "$document"}},
    
    # Lookup genres
    {"$lookup": {
        "from": "movies",  # Assuming all data is in the same collection
        "localField": "genre_ids",
        "foreignField": "id",
        "as": "genre_details"
    }},
    
    # Lookup actors
    {"$lookup": {
        "from": "movies",
        "localField": "actor_ids",
        "foreignField": "id",
        "as": "actor_details"
    }},
    
    # Project the required fields
    {"$project": {
        "title": 1,
        "popularity": 1,
        "genres": "$genre_details.name",  # Extracting only the genre names
        "actors": "$actor_details.name"  # Extracting only the actor names
    }},
    
    # Add distinct genres and actors fields
    {"$addFields": {
        "genres": {"$setUnion": ["$genres", []]},  # Ensure genres are distinct
        "actors": {"$setUnion": ["$actors", []]}  # Ensure actors are distinct
    }}
]

# Execute the aggregation pipeline
top_movies = collection.aggregate(pipeline)

# Print the results
print("Las 5 películas más populares con sus actores y géneros:\n")
for movie in top_movies:
    print(f"Title: {movie['title']}, Popularity: {movie['popularity']}")
    print(f"Genres: {', '.join(movie['genres'])}")
    print(f"Actors: {', '.join(movie['actors'])}")
    print("-" * 60)

Las 5 películas más populares con sus actores y géneros:

Title: Kingdom of the Planet of the Apes, Popularity: 3629.389
Genres: Action, Adventure, Science Fiction
Actors: Anastasia Miller, Andy McPhee, Benjamin Scott, Dichen Lachman, Dmitriy Miller, Eka Darville, Frances Berry, Freya Allan, Kaden Hartcher, Karin Konoval, Kevin Durand, Lydia Peckham, Markus Hamilton, Michael Spudic, Neil Sandilands, Nina Gallas, Nirish Bhat Surambadka, Olga Miller, Owen Teague, Peter Hayes, Peter Macon, Ras-Samuel Welda'abzgi, Samuel Falé, Sara Wiseman, Sheree da Costa, Souleymane Diasse, Travis Jeffery, Virginie Laverdure, William H. Macy
------------------------------------------------------------
Title: Godzilla x Kong: The New Empire, Popularity: 3148.883
Genres: Action, Adventure, Science Fiction
Actors: Alex Ferns, Anthony Brandon Wong, Brian Tyree Henry, Cassie Riley, Chantelle Jamieson, Chika Ikogwe, Dan Stevens, Fala Chen, Greg Hatton, Jamaliah Othman, Jordy Campbell, Kaylee Hottle, Kevin Cope

In [15]:
pipeline = [
    {"$unwind": "$production_companies"},
    {"$group": {
        "_id": "$production_companies.name",
        "average_revenue": {"$avg": "$revenue"}
    }},
    {"$sort": {"average_revenue": -1}},
    {"$limit": 10},
    {"$project": {
        "production_company": "$_id",
        "average_revenue": 1
    }}
]

top_10_companies_by_revenue = db.movies.aggregate(pipeline)

for company in top_10_companies_by_revenue:
    print(f"Production Company: {company['production_company']}, Average Revenue: {company['average_revenue']}")


Production Company: Mattel, Average Revenue: 1445638421.0
Production Company: LuckyChap Entertainment, Average Revenue: 1445638421.0
Production Company: NB/GG Pictures, Average Revenue: 1445638421.0
Production Company: Heyday Films, Average Revenue: 1084415563.0
Production Company: Joint Effort, Average Revenue: 1078958629.0
Production Company: Bron Studios, Average Revenue: 1078958629.0
Production Company: Danjaq, Average Revenue: 880674609.0
Production Company: B24, Average Revenue: 880674609.0
Production Company: EON Productions, Average Revenue: 880674609.0
Production Company: Atlas Entertainment, Average Revenue: 872874784.0666667


In [39]:
pipeline = [
    {"$unwind": "$genres"},
    {"$group": {
        "_id": "$genres.name",
        "average_budget": {"$avg": "$budget"}
    }},
    {"$sort": {"average_budget": -1}}
]
average_budget_by_genre = db.movies.aggregate(pipeline)
for genre in average_budget_by_genre:
    print(f"Genre: {genre['_id']}, Average Budget: {genre['average_budget']}")


Genre: Adventure, Average Budget: 124910833.33333333
Genre: Science Fiction, Average Budget: 103486378.73754153
Genre: Action, Average Budget: 95736386.76844783
Genre: Comedy, Average Budget: 76793814.43298969
Genre: Crime, Average Budget: 75821621.62162162
Genre: Fantasy, Average Budget: 73901041.66666667
Genre: Family, Average Budget: 57166666.666666664
Genre: History, Average Budget: 53954545.45454545
Genre: Thriller, Average Budget: 53038495.57522124
Genre: Animation, Average Budget: 49000000.0
Genre: War, Average Budget: 45000000.0
Genre: Drama, Average Budget: 44871410.88700565
Genre: Mystery, Average Budget: 33890376.29113924
Genre: Horror, Average Budget: 16478350.515463918
Genre: Romance, Average Budget: 15714285.714285715
Genre: Documentary, Average Budget: 0.0
Genre: TV Movie, Average Budget: 0.0


In [17]:
# Close the connection to MongoDB
client.close()