## **Setup**


In [3]:
!pip install --upgrade pymongo certifi

# Import necessary libraries
from pymongo import MongoClient
import pprint

# Replace with your MongoDB Atlas connection string
connection_string = "mongodb+srv://wfy8cn:9PO9iKmPAqtjTMZh@cluster0.gdfcm.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"

# Connect to MongoDB Atlas
client = MongoClient(connection_string)

# Access the sample_mflix database and the movies collection
db = client['sample_mflix']
collection = db['movies']




## **Exercise 1: Basic Searching**

In [2]:
action_movie = collection.find_one({"genres": "Action"})
movies_after_2000 = collection.find({"year": {"$gt": 2000}}).limit(5)
high_rated_movies = collection.find({"imdb.rating": {"$gt": 8.5}}).limit(5)
action_adventure_movies = collection.find({"genres": {"$all": ["Action",
"Adventure"]}}).limit(5)

pprint.pprint(action_movie)
for movie in movies_after_2000:
  pprint.pprint(movie)
for movie in high_rated_movies:
  pprint.pprint(movie)
for movie in action_adventure_movies:
  pprint.pprint(movie)


{'_id': ObjectId('573a1393f29313caabcdcb42'),
 'awards': {'nominations': 4,
            'text': 'Nominated for 1 Oscar. Another 1 win & 4 nominations.',
            'wins': 2},
 'cast': ['Meg Ryan', 'Hugh Jackman', 'Liev Schreiber', 'Breckin Meyer'],
 'countries': ['USA'],
 'directors': ['James Mangold'],
 'fullplot': 'Kate and her actor brother live in N.Y. in the 21st Century. Her '
             'ex-boyfriend, Stuart, lives above her apartment. Stuart finds a '
             'space near the Brooklyn Bridge where there is a gap in time. He '
             'goes back to the 19th Century and takes pictures of the place. '
             "Leopold -- a man living in the 1870s -- is puzzled by Stuart's "
             'tiny camera, follows him back through the gap, and they both '
             'ended up in the present day. Leopold is clueless about his new '
             'surroundings. He gets help and insight from Charlie who thinks '
             'that Leopold is an actor who is always in cha

## **Exercise 2: Sorting Results**

In [5]:
sorted_comedy_movies = collection.find({"genres": "Comedy"}).sort("imdb.rating", -
1).limit(5)
sorted_drama_movies = collection.find({"genres": "Drama"}).sort("year", 1).limit(5)

for movie in sorted_comedy_movies:
  pprint.pprint(movie)
for movie in sorted_drama_movies:
  pprint.pprint(movie)

{'_id': ObjectId('573a13faf29313caabdeca97'),
 'awards': {'nominations': 1, 'text': '1 win & 1 nomination.', 'wins': 1},
 'cast': ['Vasiliy Butkevich',
          'Pavel Chinaryov',
          'Fyodor Lavrov',
          'Elena Nesterova'],
 'countries': ['Russia'],
 'directors': ['Mikhail Mestetskiy'],
 'genres': ['Comedy', 'Drama'],
 'imdb': {'id': 4767340, 'rating': '', 'votes': ''},
 'languages': ['Russian'],
 'lastupdated': '2015-06-18 17:54:37.890000000',
 'num_mflix_comments': 0,
 'released': datetime.datetime(2015, 6, 10, 0, 0),
 'runtime': 97,
 'title': 'Tryapichnyy soyuz',
 'type': 'movie',
 'writers': ['Mikhail Mestetskiy'],
 'year': 2015}
{'_id': ObjectId('573a13faf29313caabdeca48'),
 'awards': {'nominations': 1, 'text': '1 nomination.', 'wins': 0},
 'cast': ['Aleksandr Pal',
          'Aleksandr Ilin',
          'Kristina Kazinskaya',
          'Nodar Dzhanelidze'],
 'countries': ['Russia'],
 'directors': ['Anton Chizhikov', 'Ilya Chizhikov'],
 'genres': ['Comedy', 'Mystery',

## **Exercise 3: Aggregation Pipeline**

In [6]:
avg_rating_by_genre = collection.aggregate([
{"$unwind": "$genres"},
{"$group": {"_id": "$genres", "avg_rating": {"$avg": "$imdb.rating"}}},
{"$sort": {"avg_rating": -1}},
{"$limit": 5}
])
top_directors = collection.aggregate([
{"$group": {"_id": "$directors", "avg_rating": {"$avg": "$imdb.rating"}}},
{"$sort": {"avg_rating": -1}},
{"$limit": 5}
])
movies_per_year = collection.aggregate([
{"$group": {"_id": "$year", "total_movies": {"$sum": 1}}},
{"$sort": {"_id": 1}}
])

for genre in avg_rating_by_genre:
  pprint.pprint(genre)
for director in top_directors:
  pprint.pprint(director)
for year in movies_per_year:
  pprint.pprint(year)

{'_id': 'Film-Noir', 'avg_rating': 7.396774193548388}
{'_id': 'Short', 'avg_rating': 7.390625}
{'_id': 'Documentary', 'avg_rating': 7.365130483064964}
{'_id': 'News', 'avg_rating': 7.252272727272728}
{'_id': 'History', 'avg_rating': 7.171942446043165}
{'_id': ['Sara Hirsh Bordo'], 'avg_rating': 9.4}
{'_id': ['Kevin Derek'], 'avg_rating': 9.3}
{'_id': ['Michael Benson'], 'avg_rating': 9.0}
{'_id': ['Slobodan Sijan'], 'avg_rating': 8.95}
{'_id': ['Jeethu Joseph'], 'avg_rating': 8.9}
{'_id': 1950, 'total_movies': 55}
{'_id': 1951, 'total_movies': 54}
{'_id': 1952, 'total_movies': 45}
{'_id': 1953, 'total_movies': 65}
{'_id': 1954, 'total_movies': 47}
{'_id': 1955, 'total_movies': 67}
{'_id': 1956, 'total_movies': 67}
{'_id': 1957, 'total_movies': 71}
{'_id': 1958, 'total_movies': 75}
{'_id': 1959, 'total_movies': 71}
{'_id': 1960, 'total_movies': 73}
{'_id': 1961, 'total_movies': 68}
{'_id': 1962, 'total_movies': 70}
{'_id': 1963, 'total_movies': 69}
{'_id': 1964, 'total_movies': 86}
{'_i

## **Exercise 4: Updating and Deleting Documents**


In [None]:
collection.update_one({"title": "The Godfather"}, {"$set": {"imdb.rating": 9.5}})
collection.update_many({"genres": "Horror", "imdb.rating": {"$exists": False}}, {"$set":
{"imdb.rating": 6.0}})
collection.delete_many({"year": {"$lt": 1950}})

DeleteResult({'n': 0, 'electionId': ObjectId('7fffffff0000000000000121'), 'opTime': {'ts': Timestamp(1728271494, 19), 't': 289}, 'ok': 1.0, '$clusterTime': {'clusterTime': Timestamp(1728271494, 19), 'signature': {'hash': b'\xee|\xe9\xb1b\xe43s\x97\x10CJ\xd6W\xfb\xf5wC_\xf6', 'keyId': 7363326094432272385}}, 'operationTime': Timestamp(1728271494, 19)}, acknowledged=True)

## **Exercise 5: Text Search**

In [7]:
# Create a text index on the title field
#collection.create_index([("title", "text")])
# Find movies with 'love' in the title
love_movies = collection.find({"$text": {"$search": "love"}})
# Text search across title and plot, sorted by IMDb rating
#collection.create_index([("title", "text"), ("plot", "text")])
war_movies = collection.find({"$text": {"$search": "war"}}).sort("imdb.rating", -
1).limit(5)
for movie in love_movies:
  pprint.pprint(movie)
for movie in war_movies:
  pprint.pprint(movie)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
             "the locals, including Jasper (Josh Hartnett). As Jasper's "
             'willful girlfriend Samantha (Leelee Sobieski) looks on, Kelley '
             'and Jasper engage in a dangerous car race that turns disastrous, '
             "leaving a popular diner owned by Samantha's mother in ruins. "
             'Kelley and Jasper are sentenced to a creatively ironic but '
             'fitting punishment: They must help rebuild the diner. Not only '
             "does this ruin Kelley's summer plans, he also must board at the "
             'home of his rival, Jasper. Kelley begins to fulfill his '
             '"sentence," but refuses to have anything to do with Jasper and '
             'his parents. When Kelley again encounters Samantha, the '
             'attraction is immediate and strong. First love blossoms in the '
             'Berkshire woods ("a little bit of heaven, here on earth," '
             '

## **Exercise 6: Combining Queries**

In [None]:
action_high_rated_movies = collection.find({"genres": "Action", "imdb.rating": {"$gt":
8}}).sort("year", -1)
nolan_movies = collection.find({"directors": "Christopher Nolan", "imdb.rating": {"$gt":
8}}).sort("imdb.rating", -1).limit(3)