Current state: 
* Automatically loads to cloud mongoDB.Atlas cluster data manually downloaded from detroit.opendata portal

In [1]:
import pandas as pd
from typing import Union
import pymongo
pymongo.version

from src.logger import prepare_logger
logger = prepare_logger()

In [2]:
def read_credentials(file_path : str = './data/credentials.txt') -> str:
    '''
    read password and dbname from external file to prepare connector string to pymongo.mongo_client.MongoClient
    # Connect to Your Cluster
    # https://docs.atlas.mongodb.com/tutorial/connect-to-your-cluster/
    # Insert and View Data in Your Cluster -> 
    # https://docs.atlas.mongodb.com/tutorial/insert-data-into-your-cluster/
    
    :Parameters:
    
    input
        - :file_path: file path to credentials to mongo CLUSTER
    return
        - :connect_mongo_string: mongo's string to CLUSTER connection via application
    '''
    import json
    with open(file_path, encoding='utf-8', mode='r') as f:
        json_credentials_dict = json.load(f)
        connect_mongo_string = json_credentials_dict['mongo_string'].format_map(json_credentials_dict)
        return connect_mongo_string

In [3]:
def db_connector(connect_mongo_string : str) -> pymongo.mongo_client.MongoClient:
    '''
    connects to cloud database endpoint. connects to CLUSTER and return pymongo.mongo_client.MongoClient or error text
    # Insert and View Data in Your Cluster -> 
    # https://docs.atlas.mongodb.com/tutorial/insert-data-into-your-cluster/
    
    :Parameters:
    
    input
        - :connect_mongo_string: mongo's string to CLUSTER connection via application
    return
        - :client: pymongo.mongo_client.MongoClient
    '''
    if connect_mongo_string is not None: 
        try:
            client = pymongo.MongoClient(connect_mongo_string)
            if client.admin.command('replSetGetStatus')['ok']: logger.info(f'Сonnection to cloud: True')     
            return client
        except pymongo.errors.OperationFailure:
            print(' bad auth Authentication failed.')
            logger.info(f' bad auth Authentication failed.')

In [4]:
def insert_items(collection : pymongo.collection.Collection, container : Union[pd.DataFrame, dict], one : bool = True):
    '''
    insert elements from container to cloud database. get DATABASE.COLLECTION and insert data iterating over container
    # Insert and View Data in Your Cluster -> 
    # https://docs.atlas.mongodb.com/tutorial/insert-data-into-your-cluster/
    
    :Parameters:
    
    input
        - :collection: mongo's DATABASE.COLLECTION
        - :container: DataFrame or dict 
    return
        - :items: container to cloud database
        - :inserted_ids: container of unique ids
    '''
    for item in container:
        # theoretically, wi may use update(..., upsert=True) logic or just load the whole base
        # https://pymongo.readthedocs.io/en/stable/tutorial.html#bulk-inserts
        # https://pymongo.readthedocs.io/en/stable/api/index.html
        inserted_item_object = collection.insert_one(item)
        print(inserted_item_object.inserted_id)
    logger.info(f'Items inserted.')

In [5]:
def get_items(collection : pymongo.collection.Collection):
    '''
    find all elements from cloud database. get DATABASE.COLLECTION and iterate with cursor
    # Insert and View Data in Your Cluster -> 
    # https://docs.atlas.mongodb.com/tutorial/insert-data-into-your-cluster/
    
    :Parameters:
    
    input
        - :collection: mongo's DATABASE.COLLECTION
    return
        - :items: container with all items from cloud database
    '''
    db_cursor = collection.find({})
    for item in db_cursor:
        print(item)
    logger.info(f'Database got. {collection.database.name}/{collection.name}')

In [6]:
mongo_string = read_credentials()

In [7]:
client = db_connector(mongo_string) 

In [8]:
db_database = client['crimes']
collection = db_database['detroit']

> Load sample data

In [9]:
import datetime
personDocument = {
  "name": { "first": "Alan", "last": "Turing II, Jr." },
  "birth": datetime.datetime(1912, 6, 23),
  "death": datetime.datetime(1954, 6, 7),
  "contribs": [ "Turing machine", "Turing test", "Turingery" ],
  "views": 125000012111
}

In [10]:
insert_items(collection=collection, container=[personDocument])

5ef13e808726e315ec991493


In [11]:
get_items(collection)

{'_id': ObjectId('5ef13bb38726e31c14bde94d'), 'X': -83.04574412799997, 'Y': 42.337435095000046, 'crime_id': 3057974, 'report_number': 1705020116, 'address': 'Brush St & Madison St', 'offense_description': 'INTIMIDATION / STALKING', 'offense_category': 'ASSAULT', 'state_offense_code': 1303, 'arrest_charge': '13003', 'charge_description': 'INTIMIDATION / STALKING', 'incident_timestamp': '2017-05-02T18:00:00.000Z', 'incident_time': '14:00', 'day_of_week': 2, 'hour_of_day': 14, 'year': 2017, 'scout_car_area': '0312', 'precinct': '03', 'block_id': 261635172002033, 'neighborhood': 'Downtown', 'council_district': 5, 'zip_code': 48226, 'longitude': -83.0457441283857, 'latitude': 42.337435095232294, 'oid': 26994990, 'Crime Against': 'Person', 'incident_timestamp_dt': '2017-05-02 14:00:00-04:00', 'incident_timestamp_dt_month': 5, 'incident_timestamp_dt_hour': 14, 'incident_timestamp_dt_day_of_week': 1, 'incident_timestamp_dt_day_of_month': 2}
{'_id': ObjectId('5ef13bb38726e31c14bde94e'), 'X': -8

***

In [12]:
df = pd.read_csv('data/RMS_Crime_Incidents2016_modified.csv', index_col=0)

  interactivity=interactivity, compiler=compiler, result=result)


In [13]:
memory_usege = df.memory_usage().sum() / (1024 * 1024)
print('App. file size if: {0} MB; shape is {1}, rows*cols;'.format(memory_usege.round(2), df.shape))

App. file size if: 63.01 MB; shape is (266420, 30), rows*cols;


In [14]:
df.head()

Unnamed: 0,X,Y,crime_id,report_number,address,offense_description,offense_category,state_offense_code,arrest_charge,charge_description,...,zip_code,longitude,latitude,oid,Crime Against,incident_timestamp_dt,incident_timestamp_dt_month,incident_timestamp_dt_hour,incident_timestamp_dt_day_of_week,incident_timestamp_dt_day_of_month
0,-83.045744,42.337435,3057974,1705020116,Brush St & Madison St,INTIMIDATION / STALKING,ASSAULT,1303,13003,INTIMIDATION / STALKING,...,48226,-83.045744,42.337435,26994990,Person,2017-05-02 14:00:00-04:00,5,14,1,2
1,-83.1522,42.441119,3018981,1701280248,Chippewa St & Greenlawn St,ARSON,BURGLARY,2201,22001,BURGLARY - FORCED ENTRY,...,48221,-83.1522,42.441119,26994991,Property,2017-01-26 03:30:00-05:00,1,3,3,26
2,-83.14347,42.334414,3251044,1808150108,McGraw St & Lumley St,LARCENY - OTHER,LARCENY,2307,23007,LARCENY - OTHER,...,48210,-83.14347,42.334414,26994992,Property,2018-08-13 00:30:00-04:00,8,0,0,13
3,-83.013835,42.346181,3393605,1908050159,E Lafayette St & Leib St,LARCENY - OTHER,LARCENY,2307,23007,LARCENY - OTHER,...,48207,-83.013835,42.346181,26994993,Property,2019-08-03 16:00:00-04:00,8,16,5,3
4,-83.156023,42.445779,3004324,1612200136,8 Mile Rd & Cherrylawn St,FRAUD BY WIRE,FRAUD,2605,26005,FRAUD BY WIRE,...,48221,-83.156023,42.445779,26994994,Property,2016-11-21 14:20:00-05:00,11,14,0,21


> Load modified sample data from detroit.opendata portal

In [15]:
print(db_database.name, collection.name)
for row in df.head(3).iterrows():
    #print(row[1].to_dict())
    insert_items(collection=collection, container=[row[1].to_dict()])

crimes detroit
5ef13e878726e315ec991494
5ef13e878726e315ec991495
5ef13e878726e315ec991496
