In [192]:
import pandas as pd
from typing import Union
import pymongo
# from pymongo import MongoClient
pymongo.version

'3.6.1'

In [196]:
def read_credentials(file_path : str = './data/credentials.txt') -> str:
    '''
    read password and dbname from file to prepare string to pymongo.mongo_client.MongoClient
    # Insert and View Data in Your Cluster -> 
    # https://docs.atlas.mongodb.com/tutorial/insert-data-into-your-cluster/
    
    :Parameters:
    
    input
        - :file_path: file path to credentials to mongo CLUSTER
    return
        - :connect_mongo_string: mongo's string to CLUSTER connection via application
    '''
    import json
    with open(file_path, encoding='utf-8', mode='r') as f:
        json_credentials_dict = json.load(f)
        connect_mongo_string = json_data['mongo_string'].format_map(json_data)
        return connect_mongo_string

In [184]:
def db_connector(connect_mongo_string : str) -> pymongo.mongo_client.MongoClient:
    '''
    connects to cloud database endpoint. connects to CLUSTER and return pymongo.mongo_client.MongoClient or error text
    # Insert and View Data in Your Cluster -> 
    # https://docs.atlas.mongodb.com/tutorial/insert-data-into-your-cluster/
    
    :Parameters:
    
    input
        - :connect_mongo_string: mongo's string to CLUSTER connection via application
    return
        - :client: pymongo.mongo_client.MongoClient
    '''
    if connect_mongo_string is not None: 
        try:
            client = pymongo.MongoClient(mongo_string)
            client.admin.command('replSetGetStatus')['ok']
            return client
        except pymongo.errors.OperationFailure:
            print(' bad auth Authentication failed.')

In [188]:
def insert_items(collection : pymongo.collection.Collection, container : Union[pd.DataFrame, dict], one : bool = True):
    '''
    insert elements from container to cloud database. get DATABASE.COLLECTION and insert data iterating over container
    # Insert and View Data in Your Cluster -> 
    # https://docs.atlas.mongodb.com/tutorial/insert-data-into-your-cluster/
    
    :Parameters:
    
    input
        - :collection: mongo's DATABASE.COLLECTION
        - :container: DataFrame or dict 
    return
        - :items: container to cloud database
        - :inserted_ids: container of unique ids
    '''
    for item in container:
        inserted_item_object = collection.insert_one(item)
        print(inserted_item_object.inserted_id)

In [190]:
def get_items(collection : pymongo.collection.Collection):
    '''
    find all elements from cloud database. get DATABASE.COLLECTION and iterate with cursor
    # Insert and View Data in Your Cluster -> 
    # https://docs.atlas.mongodb.com/tutorial/insert-data-into-your-cluster/
    
    :Parameters:
    
    input
        - :collection: mongo's DATABASE.COLLECTION
    return
        - :items: container with all items from cloud database
    '''
    db_cursor = collection.find({})
    for item in db_cursor:
        print(item)

In [None]:
mongo_string = read_credentials()

In [185]:
client = db_connector(mongo_string) 

In [186]:
db_database = client.crimes
detroit_collection = db_database.detroit

In [187]:
import datetime
personDocument = {
  "name": { "first": "Alan", "last": "Turing II, Jr." },
  "birth": datetime.datetime(1912, 6, 23),
  "death": datetime.datetime(1954, 6, 7),
  "contribs": [ "Turing machine", "Turing test", "Turingery" ],
  "views": 125000012111
}

In [189]:
insert_items(collection=detroit_collection, container=[personDocument])

5eefd4f98726e30a40d7174e


In [191]:
get_items(detroit_collection)

{'_id': ObjectId('5eefd4f98726e30a40d7174e'), 'name': {'first': 'Alan', 'last': 'Turing II, Jr.'}, 'birth': datetime.datetime(1912, 6, 23, 0, 0), 'death': datetime.datetime(1954, 6, 7, 0, 0), 'contribs': ['Turing machine', 'Turing test', 'Turingery'], 'views': 125000012111}


***

In [135]:
df = pd.read_csv('data/RMS_Crime_Incidents2016_modified.csv', index_col=0)

  interactivity=interactivity, compiler=compiler, result=result)


In [136]:
memory_usege = df.memory_usage().sum() / (1024 * 1024)

In [137]:
print('App. file size if: {0} MB; shape is {1}, rows*cols;'.format(memory_usege.round(2), df.shape))

App. file size if: 63.01 MB; shape is (266420, 30), rows*cols;


In [20]:
df.head()

Unnamed: 0,X,Y,crime_id,report_number,address,offense_description,offense_category,state_offense_code,arrest_charge,charge_description,...,zip_code,longitude,latitude,oid,Crime Against,incident_timestamp_dt,incident_timestamp_dt_month,incident_timestamp_dt_hour,incident_timestamp_dt_day_of_week,incident_timestamp_dt_day_of_month
0,-83.045744,42.337435,3057974,1705020116,Brush St & Madison St,INTIMIDATION / STALKING,ASSAULT,1303,13003,INTIMIDATION / STALKING,...,48226,-83.045744,42.337435,26994990,Person,2017-05-02 14:00:00-04:00,5,14,1,2
1,-83.1522,42.441119,3018981,1701280248,Chippewa St & Greenlawn St,ARSON,BURGLARY,2201,22001,BURGLARY - FORCED ENTRY,...,48221,-83.1522,42.441119,26994991,Property,2017-01-26 03:30:00-05:00,1,3,3,26
2,-83.14347,42.334414,3251044,1808150108,McGraw St & Lumley St,LARCENY - OTHER,LARCENY,2307,23007,LARCENY - OTHER,...,48210,-83.14347,42.334414,26994992,Property,2018-08-13 00:30:00-04:00,8,0,0,13
3,-83.013835,42.346181,3393605,1908050159,E Lafayette St & Leib St,LARCENY - OTHER,LARCENY,2307,23007,LARCENY - OTHER,...,48207,-83.013835,42.346181,26994993,Property,2019-08-03 16:00:00-04:00,8,16,5,3
4,-83.156023,42.445779,3004324,1612200136,8 Mile Rd & Cherrylawn St,FRAUD BY WIRE,FRAUD,2605,26005,FRAUD BY WIRE,...,48221,-83.156023,42.445779,26994994,Property,2016-11-21 14:20:00-05:00,11,14,0,21


In [None]:
{"0": {"X": -83.04574412799997,
  "Y": 42.337435095000046,
  "crime_id": 3057974,
  "report_number": 1705020116,
  "address": "Brush St & Madison St",
  "offense_description": "INTIMIDATION / STALKING",
  "offense_category": "ASSAULT",
  "state_offense_code": 1303,
  "arrest_charge": "13003",
  "charge_description": "INTIMIDATION / STALKING",
  "incident_timestamp": "2017-05-02T18:00:00.000Z",
  "incident_time": "14:00",
  "day_of_week": 2,
  "hour_of_day": 14,
  "year": 2017,
  "scout_car_area": "0312",
  "precinct": "03",
  "block_id": 261635172002033,
  "neighborhood": "Downtown",
  "council_district": 5,
  "zip_code": 48226,
  "longitude": -83.0457441283857,
  "latitude": 42.337435095232294,
  "oid": 26994990,
  "Crime Against": "Person",
  "incident_timestamp_dt": "2017-05-02 14:00:00-04:00",
  "incident_timestamp_dt_month": 5,
  "incident_timestamp_dt_hour": 14,
  "incident_timestamp_dt_day_of_week": 1,
  "incident_timestamp_dt_day_of_month": 2},
 "1": {"X": -83.15220004799994,
  "Y": 42.44111934500006,
  "crime_id": 3018981,
  "report_number": 1701280248,
  "address": "Chippewa St & Greenlawn St",
  "offense_description": "ARSON",
  "offense_category": "BURGLARY",
  "state_offense_code": 2201,
  "arrest_charge": "22001",
  "charge_description": "BURGLARY - FORCED ENTRY",
  "incident_timestamp": "2017-01-26T08:30:00.000Z",
  "incident_time": "03:30",
  "day_of_week": 4,
  "hour_of_day": 3,
  "year": 2017,
  "scout_car_area": "1203",
  "precinct": "12",
  "block_id": 261635390002005,
  "neighborhood": "Garden Homes",
  "council_district": 2,
  "zip_code": 48221,
  "longitude": -83.15220004758409,
  "latitude": 42.441119345347204,
  "oid": 26994991,
  "Crime Against": "Property",
  "incident_timestamp_dt": "2017-01-26 03:30:00-05:00",
  "incident_timestamp_dt_month": 1,
  "incident_timestamp_dt_hour": 3,
  "incident_timestamp_dt_day_of_week": 3,
  "incident_timestamp_dt_day_of_month": 26}}

In [25]:
df.head(2).T.to_dict()

{0: {'X': -83.04574412799997,
  'Y': 42.337435095000046,
  'crime_id': 3057974,
  'report_number': 1705020116,
  'address': 'Brush St & Madison St',
  'offense_description': 'INTIMIDATION / STALKING',
  'offense_category': 'ASSAULT',
  'state_offense_code': 1303,
  'arrest_charge': '13003',
  'charge_description': 'INTIMIDATION / STALKING',
  'incident_timestamp': '2017-05-02T18:00:00.000Z',
  'incident_time': '14:00',
  'day_of_week': 2,
  'hour_of_day': 14,
  'year': 2017,
  'scout_car_area': '0312',
  'precinct': '03',
  'block_id': 261635172002033,
  'neighborhood': 'Downtown',
  'council_district': 5,
  'zip_code': 48226,
  'longitude': -83.0457441283857,
  'latitude': 42.337435095232294,
  'oid': 26994990,
  'Crime Against': 'Person',
  'incident_timestamp_dt': '2017-05-02 14:00:00-04:00',
  'incident_timestamp_dt_month': 5,
  'incident_timestamp_dt_hour': 14,
  'incident_timestamp_dt_day_of_week': 1,
  'incident_timestamp_dt_day_of_month': 2},
 1: {'X': -83.15220004799994,
  'Y'

In [21]:
df.T.head().to_dict()

{0: {'X': -83.04574412799997,
  'Y': 42.337435095000046,
  'crime_id': 3057974,
  'report_number': 1705020116,
  'address': 'Brush St & Madison St'},
 1: {'X': -83.15220004799994,
  'Y': 42.44111934500006,
  'crime_id': 3018981,
  'report_number': 1701280248,
  'address': 'Chippewa St & Greenlawn St'},
 2: {'X': -83.14346986099997,
  'Y': 42.33441377200006,
  'crime_id': 3251044,
  'report_number': 1808150108,
  'address': 'McGraw St & Lumley St'},
 3: {'X': -83.01383484199994,
  'Y': 42.346180926000045,
  'crime_id': 3393605,
  'report_number': 1908050159,
  'address': 'E Lafayette St & Leib St'},
 4: {'X': -83.15602290699997,
  'Y': 42.44577881700008,
  'crime_id': 3004324,
  'report_number': 1612200136,
  'address': '8 Mile Rd & Cherrylawn St'},
 5: {'X': -83.16660061,
  'Y': 42.36103858200005,
  'crime_id': 3076195,
  'report_number': 1706110226,
  'address': 'Ellis St & Manor'},
 6: {'X': -83.04931988099996,
  'Y': 42.43835011700002,
  'crime_id': 3084544,
  'report_number': 17063