In [None]:
"""Pymongo is driver for MongoDB related persistence"""
from pymongo import MongoClient # For cluster connections, also requires dnspython package
from urllib.parse import urlparse
import pandas as pd
import os

In [None]:
"""Set environment variable for connection string"""
%env MONGODB_CONNECTION=

In [None]:
class MongoDBConnect():
    """The Mongo database connector
    Args:
        host: host to connect
    """

    def __init__(self, host):
        self.host = host
        self.connection = None

    def __enter__(self):
        self.connection = MongoClient(self.host)
        print('Mongo connection created: {0}'.format(self.connection))
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.connection.close()

    def insert_bulk(self, collection, items):
        """MongoDB bulk insert
        Args:
            collection: the collection to insert to
            items: list of json to insert
        """
        try:
            collection = self.connection[urlparse(
                self.host).path[1:]][collection]
            collection.insert(items)
            print('Successfully inserted items: {0}'.format(str(items)))
        except Exception as e:
            print('PyMongo database error: {0}'.format(str(e)))
            raise e
            
    def get_all(self, collection, limit, order=1):
        """
        MongoDB get all items
        Args:
            collection: collection to get from
            limit: integer of limit of items to retrieve, ie, 1000, 2000, etc.
            order: datetime sort: asc 1, desc -1
        """
        try:
            collection = self.connection[urlparse(
                self.host).path[1:]][collection]
            items = collection.find().sort('processed_dttm', order).limit(limit)  # oldest default
            print('Successfully found items based on limit: {0}'.format(str(limit)))
            return items
        except Exception as e:
            print('PyMongo database error: {0}'.format(str(e)))
            raise e


In [None]:
conn = os.getenv("MONGODB_CONNECTION")
with MongoDBConnect(conn) as db:
    items = db.get_all(collection="tweets", limit=100)
    df = pd.DataFrame.from_dict(items)
    print(df.head())
    # Change dtypes as needed (default as object)