### Current state: 
* Automatically loads data from detroit.opendata portal to cloud mongoDB.Atlas cluster

In [11]:
import pandas as pd
from typing import Union
import pymongo
pymongo.version

from src.logger import prepare_logger
logger = prepare_logger()

from tqdm import tqdm

In [12]:
def read_credentials(file_path : str = './data/credentials.txt') -> str:
    '''
    read password and dbname from external file to prepare connector string to pymongo.mongo_client.MongoClient
    # Mongo docs
    # Connect to Your Cluster
    # https://docs.atlas.mongodb.com/tutorial/connect-to-your-cluster/
    # Insert and View Data in Your Cluster -> 
    # https://docs.atlas.mongodb.com/tutorial/insert-data-into-your-cluster/
    
    :Parameters:
    
    input
        - :file_path: file path to credentials to mongo CLUSTER
    return
        - :connect_mongo_string: mongo's string to CLUSTER connection via application
    '''
    import json
    with open(file_path, encoding='utf-8', mode='r') as f:
        json_credentials_dict = json.load(f)
        connect_mongo_string = json_credentials_dict['mongo_string'].format_map(json_credentials_dict)
        return connect_mongo_string

In [13]:
def db_connector(connect_mongo_string : str) -> pymongo.mongo_client.MongoClient:
    '''
    connects to cloud database endpoint. connects to CLUSTER and return pymongo.mongo_client.MongoClient or error text
    # Mongo docs
    # Insert and View Data in Your Cluster -> 
    # https://docs.atlas.mongodb.com/tutorial/insert-data-into-your-cluster/
    
    :Parameters:
    
    input
        - :connect_mongo_string: mongo's string to CLUSTER connection via application
    return
        - :client: pymongo.mongo_client.MongoClient
    '''
    if connect_mongo_string is not None: 
        try:
            client = pymongo.MongoClient(connect_mongo_string)
            if client.admin.command('replSetGetStatus')['ok']: logger.info(f'Сonnection to cloud: True')     
            return client
        except pymongo.errors.OperationFailure:
            print(' bad auth Authentication failed.')
            logger.info(f' bad auth Authentication failed.')

In [14]:
def insert_per_one_item(collection : pymongo.collection.Collection, container : Union[pd.DataFrame, dict], one : bool = True):
    '''
    insert elements from container to cloud database. get DATABASE.COLLECTION and insert data iterating over container
    # Insert and View Data in Your Cluster -> 
    # https://docs.atlas.mongodb.com/tutorial/insert-data-into-your-cluster/
    
    :Parameters:
    
    input
        - :collection: mongo's DATABASE.COLLECTION
        - :container: DataFrame or dict 
    return
        - :items: container to cloud database
        - :inserted_ids: container of unique ids
    '''
    for item in tqdm(container):
        # theoretically, wi may use update(..., upsert=True) logic or just load the whole base
        # https://pymongo.readthedocs.io/en/stable/tutorial.html#bulk-inserts
        # https://pymongo.readthedocs.io/en/stable/api/index.html
        # PANDAS case https://sricharanphp.blogspot.com/2020/01/insert-pandas-dataframe-into-mongodb.html
        inserted_item_object = collection.insert_one(item)
        # print(inserted_item_object.inserted_id)

def insert_many_items(collection : pymongo.collection.Collection, container : Union[pd.DataFrame, dict], one : bool = True):
    '''
    insert elements from container to cloud database. get DATABASE.COLLECTION and insert data iterating over container
    # Insert and View Data in Your Cluster -> 
    # https://docs.atlas.mongodb.com/tutorial/insert-data-into-your-cluster/
    
    :Parameters:
    
    input
        - :collection: mongo's DATABASE.COLLECTION
        - :container: DataFrame or dict 
    return
        - :items: container to cloud database
        - :inserted_ids: container of unique ids
    '''
    # https://docs.mongodb.com/manual/reference/method/db.collection.insertMany/
    inserted_item_object = collection.insert_many(container)
        # print(inserted_item_object.inserted_id)

In [15]:
def get_items(collection : pymongo.collection.Collection):
    '''
    find all elements from cloud database. get DATABASE.COLLECTION and iterate with cursor
    # Mongo docs
    # Insert and View Data in Your Cluster -> 
    # https://docs.atlas.mongodb.com/tutorial/insert-data-into-your-cluster/
    
    :Parameters:
    
    input
        - :collection: mongo's DATABASE.COLLECTION
    return
        - :items: container with all items from cloud database
    '''
    db_cursor = collection.find({})
    aux_list = []
    for item in db_cursor:
        yield item
    logger.info(f'Database got. {collection.database.name}/{collection.name}')

> Connect to sample storage instance (MongoDB.Atlas 'collection') in a cloud

In [16]:
mongo_string = read_credentials()

In [17]:
client = db_connector(mongo_string) 

In [18]:
db_database = client['crimes']
collection = db_database['detroit_example']

> Load sample data

In [19]:
import datetime
personDocument = {
  "name": { "first": "Alan", "last": "Turing II, Jr." },
  "birth": datetime.datetime(1912, 6, 23),
  "death": datetime.datetime(1954, 6, 7),
  "contribs": [ "Turing machine", "Turing test", "Turingery" ],
  "views": 125000012111
}

In [20]:
insert_per_one_item(collection=collection, container=[personDocument])

100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 17.67it/s]


> Get sample data

In [21]:
get_items(collection)

<generator object get_items at 0x000000E9F8A5FA20>

***