In [18]:
import pymongo
from pymongo import MongoClient
import pandas as pd
import glob
from bson.json_util import loads
print ("pymongo version:", pymongo.version)

pymongo version: 3.10.1


### Check status mongodb
* sudo service mongodb status
* sudo service mongodb start

In [2]:
mongo_client = MongoClient('localhost', 27017)
host_info = mongo_client['HOST']
print ("\nhost:", host_info)


host: Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'HOST')


In [16]:
print(mongo_client.list_database_names())

['admin', 'local']


### Create database with collection

In [57]:
legalthings_db = mongo_client["legalthings"]

In [58]:
incorporation_processes_collection = legalthings_db["incorporation_processes"]

### Insert processes into collection

In [59]:
file_directory: list = ['../data/preprocess/incorporation_processes/']

In [60]:
file_directory

['../data/preprocess/incorporation_processes/']

In [61]:
def list_files(
    directory: str = 'data/preprocess/incorporation_processes/',
    file_type: str = 'json'
) -> list:
    """Returns list of files in local folder."""
    return glob.glob("{}*.{}".format(directory, file_type))


def keep_only_file_name(
    file_name: str = 'data/preprocess/incorporation_processes/processes_0.json'
):
    """Returns only the file name without the extension."""
    return file_name.split('/')[-1].split('.')[0]

In [62]:
all_files = list_files(
    '../data/preprocess/incorporation_processes/',
    'json'
)

In [63]:
keep_only_file_name(all_files[0])

'processes_0'

In [64]:
incorporation_processes = []

for line in open('../data/preprocess/incorporation_processes/processes_0.json', 'r'):
    incorporation_processes.append(loads(line))

In [65]:
incorporation_processes_collection.insert_many(incorporation_processes)
# print(x.inserted_ids)

<pymongo.results.InsertManyResult at 0x7f89456d6960>

In [70]:
legalthings_db.list_collection_names()

[]

In [69]:
legalthings_db.make.drop()

### Drop database

In [15]:
mongo_client.drop_database('legalthings')

### Explore collection

In [7]:
incorporation_processes_collection.find_one()

{'_id': ObjectId('5b1ee64ac85432eb54988419'),
 'title': 'BV Oprichten',
 'name': 'BasicBlack Holding B.V',
 'scenario': {'name': 'BV Oprichten (funnel)',
  'image': None,
  'description': None,
  'alias': [{'key': 'spaar-bv', 'value': 'Spaar BV Oprichten'},
   {'key': 'normal-bv', 'value': 'BV Oprichten'},
   {'key': 'holding', 'value': 'Holding Oprichten'},
   {'key': 'inbreng', 'value': 'BV Oprichten met Inbreng'},
   {'key': 'fusie', 'value': 'Aandelen-/Bedrijfsfusie'},
   {'key': 'zorg', 'value': 'Zorg BV Oprichten'}],
  'title': 'BV Oprichten',
  'actors': [{'title': 'Cliënt',
    'requirement': None,
    'id': None,
    'organization': None,
    'name': None,
    'absent': False,
    'key': 'user'},
   {'title': 'Notaris',
    'requirement': None,
    'id': {'<ref>': 'global.config.legalflow.workflows.general.notary.id'},
    'organization': {'id': None,
     'name': None,
     'type': None,
     '<ref>': 'global.config.legalflow.workflows.general.notary.organization'},
    'abse

#### Filter out columns

In [13]:
for data in incorporation_processes_collection.find({},{ "_id": 0, "name": 1, 'alias': [{'key': 1}]}):
    data
    
print(data)

{'name': 'VIA'}


#### Filter on value in column

In [15]:
filter_query = { "title": "BV Oprichten" }

filtered_col = incorporation_processes_collection.find(filter_query, { "title": 1, "name": 1})

for data in filtered_col:
    data

print(data)

{'_id': ObjectId('5b8ef8ba0e3aecb72c01f94c'), 'title': 'BV Oprichten', 'name': 'VIA'}


#### Iterate through documents

In [16]:
docs = incorporation_processes_collection.find()

In [25]:
docs.next(), docs.rewind()

({'_id': ObjectId('5b1fbca14a3c53460253a075'),
  'title': 'BV Oprichten',
  'name': 'Zip & Zo GrandCafe',
  'scenario': {'name': 'BV Oprichten (funnel)',
   'image': None,
   'description': None,
   'alias': [{'key': 'spaar-bv', 'value': 'Spaar BV Oprichten'},
    {'key': 'normal-bv', 'value': 'BV Oprichten'},
    {'key': 'holding', 'value': 'Holding Oprichten'},
    {'key': 'inbreng', 'value': 'BV Oprichten met Inbreng'},
    {'key': 'fusie', 'value': 'Aandelen-/Bedrijfsfusie'},
    {'key': 'zorg', 'value': 'Zorg BV Oprichten'}],
   'title': 'BV Oprichten',
   'actors': [{'title': 'Cliënt',
     'requirement': None,
     'id': None,
     'organization': None,
     'name': None,
     'absent': False,
     'key': 'user'},
    {'title': 'Notaris',
     'requirement': None,
     'id': {'<ref>': 'global.config.legalflow.workflows.general.notary.id'},
     'organization': {'id': None,
      'name': None,
      'type': None,
      '<ref>': 'global.config.legalflow.workflows.general.notary.or

#### Document deepdive

In [39]:
for key, value in docs.next().items():
    print(key, value)

_id 5b22837a4a3c53b71cadd771
title BV Oprichten
name Novumator
scenario {'name': 'BV Oprichten (funnel)', 'image': None, 'description': None, 'alias': [{'key': 'spaar-bv', 'value': 'Spaar BV Oprichten'}, {'key': 'normal-bv', 'value': 'BV Oprichten'}, {'key': 'holding', 'value': 'Holding Oprichten'}, {'key': 'inbreng', 'value': 'BV Oprichten met Inbreng'}, {'key': 'fusie', 'value': 'Aandelen-/Bedrijfsfusie'}, {'key': 'zorg', 'value': 'Zorg BV Oprichten'}], 'title': 'BV Oprichten', 'actors': [{'title': 'Cliënt', 'requirement': None, 'id': None, 'organization': None, 'name': None, 'absent': False, 'key': 'user'}, {'title': 'Notaris', 'requirement': None, 'id': {'<ref>': 'global.config.legalflow.workflows.general.notary.id'}, 'organization': {'id': None, 'name': None, 'type': None, '<ref>': 'global.config.legalflow.workflows.general.notary.organization'}, 'absent': False, 'key': 'notary'}, {'title': {'<ref>': 'global.config.legalflow.workflows.general.company.name'}, 'requirement': None, '

In [41]:
for key, value in docs.next().items():
    if key == 'scenario':
        scenario = value

In [47]:
scenario['alias']

[{'key': 'spaar-bv', 'value': 'Spaar BV Oprichten'},
 {'key': 'normal-bv', 'value': 'BV Oprichten'},
 {'key': 'holding', 'value': 'Holding Oprichten'},
 {'key': 'inbreng', 'value': 'BV Oprichten met Inbreng'},
 {'key': 'fusie', 'value': 'Aandelen-/Bedrijfsfusie'},
 {'key': 'zorg', 'value': 'Zorg BV Oprichten'}]

In [53]:
for key, value in scenario.items():
    print(key)

name
image
description
alias
title
actors
actions
allow_actions
assets
categories
start
published
personal_organization
notify_assets
notify_comments
legalform
contract
version
permissions
debug
locked
sync
_id
