In [5]:
import json

# Demonstration of how to load a file that contains secrets without accidentally leaking those secrets
with open('credentials.json') as f:
    data = json.load(f)

    # If you want your data to be secure, don't print this variable out!
    # Jupyter will retain a cached version of any printed data and it can be
    # accidentally committed to version control.
    secret_key = data['mongodb']

# We can safely print the length of the secret key. That won't leak any sensitive information.
print(f"My secret key is {len(secret_key)} characters in length.")

My secret key is 68 characters in length.


In [4]:
!pip install pymongo

Collecting pymongo
  Downloading pymongo-4.7.1-cp310-cp310-macosx_10_9_x86_64.whl.metadata (22 kB)
Collecting dnspython<3.0.0,>=1.16.0 (from pymongo)
  Downloading dnspython-2.6.1-py3-none-any.whl.metadata (5.8 kB)
Downloading pymongo-4.7.1-cp310-cp310-macosx_10_9_x86_64.whl (486 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m486.1/486.1 kB[0m [31m697.0 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading dnspython-2.6.1-py3-none-any.whl (307 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m307.7/307.7 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h[33mDEPRECATION: pytorch-lightning 1.8.3.post0 has a non-standard dependency specifier torch>=1.9.*. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pytorch-lightning or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/

In [7]:
import pymongo
import certifi

# Connect to the database using known good certificates
client = pymongo.MongoClient(secret_key, tlsCAFile=certifi.where())
print(f"Using MongoDB version {client.server_info()['version']}.")

# Check what databases exist on this server
all_databases = client.list_database_names()
print(f"This MongoDB server has the databases {all_databases}")

# If we know the correct database to talk to, we connect like this:
data320 = client['data320']

# Here is the list of collections within my database
all_collections = data320.list_collection_names()
print(f"This database has the collections {all_collections}")

Using MongoDB version 7.0.8.
This MongoDB server has the databases ['data320', 'sample_mflix', 'admin', 'local']
This database has the collections ['imdb', 'movies']


In [10]:
import pandas as pd
import re

# Retrieve all records from a collection - this can be a large amount of data!
cursor = data320["movies"].find({"release_date": re.compile("2002")})

# Convert this information into a Pandas dataframe
metacritic = pd.DataFrame(cursor)

# Make sure we've read the information correctly
metacritic.head()


Unnamed: 0,_id,movie_id,title,release_date,description,score,thumbnail
0,66286fe7c5e83e4c458bf92f,11983,Spirited Away,"September 20, 2002","A young girl, Chihiro, becomes trapped in a st...",96,https://static.metacritic.com/images/products/...
1,66286fe7c5e83e4c458bf930,11984,Atanarjuat: The Fast Runner,"June 7, 2002",Evil in the form of an unknown shaman divides ...,91,https://static.metacritic.com/images/products/...
2,66286fe7c5e83e4c458bf931,11985,Bloody Sunday,"October 4, 2002",This controversial and critically acclaimed fi...,90,https://static.metacritic.com/images/products/...
3,66286fe7c5e83e4c458bf932,11986,Y Tu Mamá También,"March 15, 2002","Abandoned by their girlfriends for the summer,...",88,https://static.metacritic.com/images/products/...
4,66286fe7c5e83e4c458bf933,11987,Time Out,"March 29, 2002","After losing his job, Vincent (Recoing) can't ...",88,https://static.metacritic.com/images/products/...
