In [None]:
from pymongo import MongoClient

def get_top_movies_by_genre(genre):
    client = MongoClient("mongodb://localhost:27017") # connect to the database
    db = client['3675Local'] # connect to the database

    pipeline = [
        # Filter basics first to reduce the number of documents before join
        {
            "$match": {
                "titleType": "movie",
                "isAdult": 0,
                "genres": genre  # Since genres is an array, this checks for inclusion
            }
        },
        # Join ratings data from the ratings collection
        {
            "$lookup": {
                "from": "title.ratings_cleaned",
                "localField": "tconst",
                "foreignField": "tconst",
                "as": "rating_info"
            }
        },
        # Flatten the rating_info array
        { "$unwind": "$rating_info" },
        # Further filter on the joined rating info for minimum votes
        {
            "$match": {
                "rating_info.numVotes": { "$gte": 1000 }
            }
        },
        # Sort by the average rating in descending order
        {
            "$sort": { "rating_info.averageRating": -1 }
        },
        # Limit to the top 5 results
        {
            "$limit": 5
        },
        # Project only the fields you need
        {
            "$project": {
                "_id": 0,
                "title": "$primaryTitle",
                "rating": "$rating_info.averageRating",
                "votes": "$rating_info.numVotes",
                "year": "$startYear"
            }
        }
    ]

    results = list(db.title.basics_cleaned.aggregate(pipeline))
    return results




In [None]:
print(get_top_movies_by_genre("Comedy"))

In [15]:
from pymongo import MongoClient

def get_top_movies_by_genre(genre):
    client = MongoClient("mongodb://localhost:27017") # connect to the database
    db = client['3675Local'] # connect to the database

    pipeline = [
        # Join ratings from the ratings collection into the basics collection
        {
            "$lookup": {
                "from": "title.ratings_cleaned",
                "localField": "tconst",
                "foreignField": "tconst",
                "as": "rating_info"
            }
        },
        # Unwind the joined rating_info array (should be one element per document)
        { "$unwind": "$rating_info" },
        # Filter for movies that are non-adult, belong to the specified genre, and have at least 1000 votes
        {
            "$match": {
                "titleType": "movie",
                "isAdult": 0,
                "genres": genre,  # matches if the genre is in the genres array
                "rating_info.numVotes": { "$gte": 1000 }
            }
        },
        # Sort the movies by average rating in descending order
        {
            "$sort": { "rating_info.averageRating": -1 }
        },
        # Limit to the top 5 movies
        {
            "$limit": 5
        },
        # Project only the fields you need in the result
        {
            "$project": {
                "_id": 0,
                "title": "$primaryTitle",
                "rating": "$rating_info.averageRating",
                "votes": "$rating_info.numVotes",
                "year": "$startYear"
            }
        }
    ]

    results = list(db.title.basics_cleaned.aggregate(pipeline))
    return results


In [None]:

top_comedy_movies = get_top_movies_by_genre("Comedy")
for movie in top_comedy_movies:
    print(movie)

KeyboardInterrupt: 