In [1]:
# SECURE CONNECTION: No hardcoded passwords
import getpass
import pymongo
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi

# Pre-fill known values, only prompt for password securely
username = "alwynquek07"  # Your known username
cluster = "cluster0.ioejg3g.mongodb.net"  # Your known cluster

# Only ask for password securely (won't show on screen)
password = getpass.getpass(f"Enter password for {username}: ")

# Build URI using secure input
uri = f"mongodb+srv://{username}:{password}@{cluster}/?retryWrites=true&w=majority&appName=Cluster0"

# Create a new client and connect to the server
client = MongoClient(uri, server_api=ServerApi('1'))

# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("✅ Successfully connected to MongoDB!")
    print("🔒 Password was entered securely (not stored in code)")
except Exception as e:
    print(f"❌ Connection failed: {e}")

✅ Successfully connected to MongoDB!
🔒 Password was entered securely (not stored in code)


In [2]:
client.list_database_names()
db = client.sample_mflix
movies = db.movies

Question: From the `movies` collection, return the documents with the `plot` that starts with `"war"` in acending order of released date, print only title, plot and released fields. Limit the result to 5.

In [3]:
db = client.sample_mflix
movies = db.movies
pipeline = [
    {"$match": {"plot": {"$regex": "^war", "$options": "i"}}},
    {"$project": {"title": 1, "plot": 1, "released": 1}},
    {"$sort": {"released": 1}},
    {"$limit": 5}
]
result = movies.aggregate(pipeline)
for movie in result:
    print(movie)

{'_id': ObjectId('573a1398f29313caabce9508'), 'plot': 'Warrior/pacifist Princess Nausicaè desperately struggles to prevent two warring nations from destroying themselves and their dying planet.', 'title': 'Nausicaè of the Valley of the Wind', 'released': datetime.datetime(1984, 3, 11, 0, 0)}
{'_id': ObjectId('573a1398f29313caabce91ec'), 'plot': 'Warrior/pacifist Princess Nausicaè desperately struggles to prevent two warring nations from destroying themselves and their dying planet.', 'title': 'Nausicaè of the Valley of the Wind', 'released': datetime.datetime(1984, 3, 11, 0, 0)}
{'_id': ObjectId('573a1398f29313caabcebfc6'), 'plot': 'Warlords Kagetora and Takeda each wish to prevent the other from gaining hegemony in feudal Japan. The two samurai leaders pursue one another across the countryside, engaging in massive ...', 'title': 'Heaven and Earth', 'released': datetime.datetime(1991, 2, 8, 0, 0)}
{'_id': ObjectId('573a13b7f29313caabd49fe5'), 'plot': 'Warring alien and predator races d

In [8]:
for movie in movies.find({"plot": {"$regex": "^war", "$options": "i"}}) \
                   .sort("released", pymongo.ASCENDING) \
                   .limit(5):
    print({
        "title": movie.get("title"),
        "plot": movie.get("plot"),
        "released": movie.get("released")
    })

{'title': 'Nausicaè of the Valley of the Wind', 'plot': 'Warrior/pacifist Princess Nausicaè desperately struggles to prevent two warring nations from destroying themselves and their dying planet.', 'released': datetime.datetime(1984, 3, 11, 0, 0)}
{'title': 'Nausicaè of the Valley of the Wind', 'plot': 'Warrior/pacifist Princess Nausicaè desperately struggles to prevent two warring nations from destroying themselves and their dying planet.', 'released': datetime.datetime(1984, 3, 11, 0, 0)}
{'title': 'Heaven and Earth', 'plot': 'Warlords Kagetora and Takeda each wish to prevent the other from gaining hegemony in feudal Japan. The two samurai leaders pursue one another across the countryside, engaging in massive ...', 'released': datetime.datetime(1991, 2, 8, 0, 0)}
{'title': 'Aliens vs. Predator: Requiem', 'plot': 'Warring alien and predator races descend on a small town, where unsuspecting residents must band together for any chance of survival.', 'released': datetime.datetime(2007, 1

Question: Group by `rated` and count the number of movies in each.


In [9]:
#Group by `rated` and count the number of movies in each
pipeline = [
    {
        "$group": {
            "_id": "$rated",
            "count": {"$sum": 1}
        }
    }
]

# Execute the aggregation pipeline
results = movies.aggregate(pipeline)

# Print the results
for result in results:
    print(f"Rating: {result['_id']}, Count: {result['count']}")

Rating: GP, Count: 44
Rating: Not Rated, Count: 1
Rating: TV-PG, Count: 76
Rating: G, Count: 477
Rating: OPEN, Count: 1
Rating: R, Count: 5537
Rating: APPROVED, Count: 709
Rating: PG-13, Count: 2321
Rating: PG, Count: 1852
Rating: TV-Y7, Count: 3
Rating: TV-G, Count: 59
Rating: AO, Count: 3
Rating: Approved, Count: 5
Rating: M, Count: 37
Rating: TV-14, Count: 89
Rating: PASSED, Count: 181
Rating: TV-MA, Count: 60
Rating: None, Count: 9894


Question: Count the number of movies with 3 comments or more.


In [13]:
# Count the number of movies with 3 comments or more (optimized pipeline)
# Ensure index exists for fast lookup
index_name = db.comments.create_index("movie_id")

pipeline = [
    {"$lookup": {
        "from": "comments",
        "let": {"movie_id": "$_id"},
        "pipeline": [
            { "$match": { "$expr": { "$eq": ["$movie_id", "$$movie_id"] } } }
        ],
        "as": "comments"
    }},
    {"$match": {
        "$expr": {
            "$gte": [{ "$size": "$comments" }, 3]
        }
    }},
    {"$count": "num_movies"}
]

# Execute the aggregation pipeline
results = movies.aggregate(pipeline)

# Print the results
for result in results:
    print(f"Number of movies with 3 comments or more: {result['num_movies']}")

Number of movies with 3 comments or more: 385
