## Description
This notebook walks through a simple way to connect to Amazon DocumentDB from an Amazon SageMaker notebook.
Ensure that the SageMaker Notebook instance is in a security group that can access Amazon DocumentDB.

This notebook performs the following tasks
* import some needed packages.
* gather the connection details and credentials for the Amazon DocumentDB cluster and make the connection
* issue a basic command to verify successfull connection
* issue some insert statements
* issue some read statements

In [None]:
import sys
!{sys.executable} -m pip install --upgrade pymongo

In [None]:
import os

pem_location = "/home/ec2-user/SageMaker/rds-combined-ca-bundle.pem"

if os.path.isfile( pem_location ):
    print( "CERT exists!" )
else:
    print( "CERT doesn't exist. Please upload in the home screen right next to this notebook" )

In [None]:
import pymongo
import getpass
import json

### Provide connection information for DocumentDB cluster

In [None]:
# Prompt user for Amazon DocumentDB connection details and credentials
docdb_host = input("Amazon DocumentDB cluster endpoint:")
docdb_port = input("Amazon DocumentDB port:")
username = input("Username:")
password = getpass.getpass("Password:")

# Make a client connection to Amazon DocumentDB with the supplied connection detals and credentials
uri_str = "mongodb://{username}:{password}@{docdb_host}:{docdb_port}/?ssl=true&replicaSet=rs0&readPreference=secondaryPreferred&retryWrites=false"
uri = uri_str.format(username=username, password=password, docdb_host=docdb_host, docdb_port=docdb_port, pem_location=pem_location)
client = pymongo.MongoClient(uri, tlsCAFile = pem_location )

# Run a simple command to verify a connection
#  This command will show cluster details for the Amazon DocumentDB cluster
client["admin"].command("ismaster")

### 

### Insert test data

In [None]:
# Insert a few documents
db_name = "ecommerce"  # database name
coll_name = "person"  # collection name

# Get objects for the database and the collection
db = client[db_name]
coll = db[coll_name]
coll.drop()         # drop collection to clear out any existing data

handle = open( "docDBData.json" )

data = json.load( handle )

handle.close()

print( json.dumps( data[0], indent = 2 ) )

coll.insert_many( data )
data = []

print( f'Inserted {coll.estimated_document_count():,} record(s)' )

### Get top 5 records where `personStatus` has a value _Inactive_

In [None]:
#Simple query
for doc in coll.find({ "personStatus": "Inactive" }).limit(5):
    print( doc[ "personName" ] )

### Get top 5 records where at least one `paymentMethodType` property with in the `paymentMethod` array has a value _Bank Account_

In [None]:
#query with hierarchies
for doc in coll.find({ "paymentMethod.paymentMethodType": "Bank Account" }).limit(5):
    print( f'Name: {doc[ "personName" ]:<16} Payment Methods: {[ d[ "paymentMethodType" ] for d in doc[ "paymentMethod" ] ]}' )

### Get top 5 records where at least one `docVersion` property with in `docHistory` array contained in the `metadata` section has a value greater than _1.2_

In [None]:
for doc in coll.find({ "metadata.docHistory.docVersion": { "$gt": 1.2 } }).limit(5):
    print( f'Name: {doc[ "personName" ]:<15} Newest Version: {max([ d[ "docVersion" ] for d in doc[ "metadata" ][ "docHistory" ] ])}' )