# Notebook with all DB creation operations and CRUD operations

# Part 1: Migrate data from MySQL database to Mongo database

## Imports

In [89]:
from tqdm.notebook import tqdm
from pprint import pprint
from decimal import Decimal
from bson.decimal128 import Decimal128
from datetime import date

## Connection

In [15]:
# Connect to mySQL server
import mysql.connector

def connect_mysql():

    mydb = mysql.connector.connect(host='rhea.isegi.unl.pt', 
                                   user='wwi-read-only-user', 
                                   database='WWI',
                                   password='jGp2GCqrss6nfTEu5ZawhW3mksLsQYQb',
                                   port=3306
                                  )
    mycursor = mydb.cursor()
    
    return mydb, mycursor

    # Getting the table names
    #mycursor.execute('SHOW TABLES;')
    #print(f"Tables: {mycursor.fetchall()}")

    # Getting a tables column descriptios
    #mycursor.execute('DESCRIBE Purchasing_PurchaseOrderLines;')
    #print(f"Purchasing_PurchaseOrderLines schema: {mycursor.fetchall()}")
    
mydb, mycursor = connect_mysql()

In [16]:
# Connect to Mongo server

from pymongo import MongoClient

host="rhea.isegi.unl.pt"
port="27049"
user="GROUP_32"
password="bRG2XjRZhrRA9IfpmENyXxMlWQDUJdzL"
protocol="mongodb"
client = MongoClient(f"{protocol}://{user}:{password}@{host}:{port}")

# Connect to mongo db
db = client.#database name here

In [17]:
# Get mysql tables
mycursor.execute('SHOW TABLES;')
tables = mycursor.fetchall()

## For deleting everything and flushing the cursor

In [204]:
db.list_collection_names()

['Sales_InvoiceLines',
 'Sales_Orders',
 'Sales_CustomerTransactions',
 'Purchasing_SupplierTransactions',
 'Sales_OrderLines',
 'Sales_Customers',
 'Purchasing_PurchaseOrders',
 'Purchasing_PurchaseOrderLines',
 'Purchasing_SupplierCategories',
 'Sales_CustomerCategories',
 'Sales_Invoices',
 'Purchasing_Suppliers']

In [205]:
for i in range(len(tables)):
    
    table_name = tables[i][0]
    
    db[table_name].drop()

In [206]:
db.list_collection_names()

[]

In [10]:
mycursor.fetchall()

[]

## Get row counts of each table

In [7]:
for i in range(len(tables)):
    
    # Get MySQL table name
    table_name = tables[i][0]
    
    # Define our query
    query = ( "SELECT * " 
             "FROM " + table_name )
    
    # Execute query
    mycursor.execute( query )
    
    # Print row counts
    print(table_name)
    print(len(mycursor.fetchall()))

Purchasing_PurchaseOrderLines
8367
Purchasing_PurchaseOrders
2074
Purchasing_SupplierCategories
9
Purchasing_SupplierTransactions
2438
Purchasing_Suppliers
13
Sales_CustomerCategories
8
Sales_CustomerTransactions
52000
Sales_Customers
663
Sales_InvoiceLines
30000
Sales_Invoices
70510
Sales_OrderLines
76000
Sales_Orders
73595


## Migrate data from MySQL database to mongo database

In [207]:
# Adapted from:
# https://nicksardo.wordpress.com/2015/11/24/transferring-data-between-mysql-and-mongodb/

def migrate_table(table_index):
    
    # Get MySQL table name
    table_name = tables[table_index][0]
    
    # Print table name
    print('Migrating table', table_name)
    
    # Get names of the columns of this table
    describe = 'DESCRIBE ' + table_name + ';'
    mycursor.execute(describe)
    describe_out = mycursor.fetchall()

    cols = []
    for col_index in range(len(describe_out)):
        col = describe_out[col_index][0]
        cols.append(col)
    
    # Create mongodb collection
    collection = db[table_name]
    
    # Define our query
    query = ( "SELECT * " 
             "FROM " + table_name )
    
    # Get row count and print it
    mycursor.execute( query )
    print('Rows in this table:', str(len(mycursor.fetchall())))
    
    # Execute query again
    mycursor.execute( query )
    
    #mongo client specifically requires python dict
    cus = dict()

    #custom record id rather than mongodb default hash id                                          
    cid = 0                                                 

    #cycle through each mySQL row
    for ( row ) in tqdm(mycursor):
        cid        += 1   #increment id
        cus['_id'] = cid                                    

        #check if current row is null
        for i in range( 0, len( row ) ):
            
            if row[i] == None:
                #if the record is null, skip it                  
                continue
            
            else:
                row_title      = "".join( cols[i] )  
                
                field          = row[i]
                
                if isinstance(field, Decimal):
                    field = float(str(field))
                    
                if isinstance(field, date):
                    field = str(field)
                
                #add current record's field's title and value             
                cus[row_title] = field
                
        #we've completed processing this row, insert it into mongoldb      
        collection.insert_one( cus )

In [208]:
def migrate_table_10k_entries(table_index, ten_k_step):
    
    from_entry = (ten_k_step * 10000) + 1
    till_entry = ((ten_k_step + 1) * 10000) + 1
    print('Migrate entries larger or equal to', str(from_entry), 'and smaller than', str(till_entry))
    
    # Get MySQL table name
    table_name = tables[table_index][0]
    
    # Print table name
    print('Migrating table', table_name)
    
    # Get names of the columns of this table
    describe = 'DESCRIBE ' + table_name + ';'
    mycursor.execute(describe)
    describe_out = mycursor.fetchall()

    cols = []
    for col_index in range(len(describe_out)):
        col = describe_out[col_index][0]
        cols.append(col)
    
    # Create mongodb collection
    collection = db[table_name]
    
    collection.delete_many({'_id' : { "$gte": from_entry}})
    
    # Define our query
    query = ( "SELECT * " 
             "FROM " + table_name )
    
    # Get row count and print it
    mycursor.execute( query )
    print('Rows in this table:', str(len(mycursor.fetchall())))
    
    # Execute query again
    mycursor.execute( query )
    
    #mongo client specifically requires python dict
    cus = dict()

    #custom record id rather than mongodb default hash id                                          
    cid = 0                                                 

    #cycle through each mySQL row
    for ( row ) in tqdm(mycursor):
        cid        += 1   #increment id
        cus['_id'] = cid   
        
        if cid >= from_entry and cid < till_entry:
            
            #check if current row is null
            for i in range( 0, len( row ) ):

                if row[i] == None:
                    #if the record is null, skip it                  
                    continue

                else:
                    row_title      = "".join( cols[i] )  

                    field          = row[i]

                    if isinstance(field, Decimal):
                        field = float(str(field))

                    if isinstance(field, date):
                        field = str(field)

                    #add current record's field's title and value             
                    cus[row_title] = field

            #we've completed processing this row, insert it into mongoldb      
            collection.insert_one( cus )

In [262]:
i = 11
i

11

In [263]:
tables[i][0]

'Sales_Orders'

In [211]:
db[tables[i][0]].drop()
migrate_table(i)

Migrating table Purchasing_PurchaseOrderLines
Rows in this table: 8367


0it [00:00, ?it/s]

In [212]:
db[tables[i][0]].count_documents({})

8367

In [214]:
db[tables[i][0]].drop()
migrate_table(i)

Migrating table Purchasing_PurchaseOrders
Rows in this table: 2074


0it [00:00, ?it/s]

In [215]:
db[tables[i][0]].count_documents({})

2074

In [217]:
db[tables[i][0]].drop()
migrate_table(i)

Migrating table Purchasing_SupplierCategories
Rows in this table: 9


0it [00:00, ?it/s]

In [218]:
db[tables[i][0]].count_documents({})

9

In [220]:
db[tables[i][0]].drop()
migrate_table(i)

Migrating table Purchasing_SupplierTransactions
Rows in this table: 2438


0it [00:00, ?it/s]

In [221]:
db[tables[i][0]].count_documents({})

2438

In [223]:
db[tables[i][0]].drop()
migrate_table(i)

Migrating table Purchasing_Suppliers
Rows in this table: 13


0it [00:00, ?it/s]

In [224]:
db[tables[i][0]].count_documents({})

13

In [226]:
db[tables[i][0]].drop()
migrate_table(i)

Migrating table Sales_CustomerCategories
Rows in this table: 8


0it [00:00, ?it/s]

In [227]:
db[tables[i][0]].count_documents({})

8

In [229]:
db[tables[i][0]].drop()
migrate_table(i)

Migrating table Sales_CustomerTransactions
Rows in this table: 52000


0it [00:00, ?it/s]

In [230]:
db[tables[i][0]].count_documents({})

52000

In [232]:
db[tables[i][0]].drop()
migrate_table(i)

Migrating table Sales_Customers
Rows in this table: 663


0it [00:00, ?it/s]

In [233]:
db[tables[i][0]].count_documents({})

663

In [235]:
db[tables[i][0]].drop()
migrate_table(i)

Migrating table Sales_InvoiceLines
Rows in this table: 30000


0it [00:00, ?it/s]

In [236]:
db[tables[i][0]].count_documents({})

30000

In [238]:
db[tables[i][0]].drop()
migrate_table(i)

Migrating table Sales_Invoices
Rows in this table: 70510


0it [00:00, ?it/s]

In [239]:
db[tables[i][0]].count_documents({})

70510

In [241]:
db[tables[i][0]].drop()
migrate_table(i)

Migrating table Sales_OrderLines
Rows in this table: 76000


0it [00:00, ?it/s]

OperationalError: 2013 (HY000): Lost connection to MySQL server during query

In [242]:
db[tables[i][0]].count_documents({})

32187

In [245]:
migrate_table_10k_entries(i, 3)

Migrate entries larger or equal to 30001 and smaller than 40001
Migrating table Sales_OrderLines
Rows in this table: 76000


0it [00:00, ?it/s]

OperationalError: 2013 (HY000): Lost connection to MySQL server during query

In [249]:
db[tables[i][0]].count_documents({})

40000

In [250]:
migrate_table_10k_entries(i, 4)

Migrate entries larger or equal to 40001 and smaller than 50001
Migrating table Sales_OrderLines
Rows in this table: 76000


0it [00:00, ?it/s]

OperationalError: 2013 (HY000): Lost connection to MySQL server during query

In [254]:
db[tables[i][0]].count_documents({})

50000

In [255]:
migrate_table_10k_entries(i, 5)

Migrate entries larger or equal to 50001 and smaller than 60001
Migrating table Sales_OrderLines
Rows in this table: 76000


0it [00:00, ?it/s]

In [256]:
db[tables[i][0]].count_documents({})

60000

In [257]:
migrate_table_10k_entries(i, 6)

Migrate entries larger or equal to 60001 and smaller than 70001
Migrating table Sales_OrderLines
Rows in this table: 76000


0it [00:00, ?it/s]

In [258]:
db[tables[i][0]].count_documents({})

70000

In [259]:
migrate_table_10k_entries(i, 7)

Migrate entries larger or equal to 70001 and smaller than 80001
Migrating table Sales_OrderLines
Rows in this table: 76000


0it [00:00, ?it/s]

In [260]:
db[tables[i][0]].count_documents({})

76000

In [264]:
db[tables[i][0]].drop()
migrate_table(i)

Migrating table Sales_Orders
Rows in this table: 73595


0it [00:00, ?it/s]

OperationalError: 2013 (HY000): Lost connection to MySQL server during query

In [268]:
db[tables[i][0]].count_documents({})

40017

In [269]:
migrate_table_10k_entries(i, 4)

Migrate entries larger or equal to 40001 and smaller than 50001
Migrating table Sales_Orders
Rows in this table: 73595


0it [00:00, ?it/s]

OperationalError: 2013 (HY000): Lost connection to MySQL server during query

In [273]:
db[tables[i][0]].count_documents({})

50000

In [274]:
migrate_table_10k_entries(i, 5)

Migrate entries larger or equal to 50001 and smaller than 60001
Migrating table Sales_Orders
Rows in this table: 73595


0it [00:00, ?it/s]

In [275]:
db[tables[i][0]].count_documents({})

60000

In [276]:
migrate_table_10k_entries(i, 6)

Migrate entries larger or equal to 60001 and smaller than 70001
Migrating table Sales_Orders
Rows in this table: 73595


0it [00:00, ?it/s]

In [277]:
db[tables[i][0]].count_documents({})

70000

In [278]:
migrate_table_10k_entries(i, 7)

Migrate entries larger or equal to 70001 and smaller than 80001
Migrating table Sales_Orders
Rows in this table: 73595


0it [00:00, ?it/s]

In [279]:
db[tables[i][0]].count_documents({})

73595

In [271]:
mycursor.fetchall()

[]

In [272]:
mydb, mycursor = connect_mysql()

## Check newly created mongo database

In [280]:
db.list_collection_names()

['Sales_CustomerTransactions',
 'Sales_Invoices',
 'Warehouse_StockItemTransactions',
 'Sales_CustomerCategories',
 'Purchasing_SupplierTransactions',
 'Purchasing_PurchaseOrders',
 'Warehouse_StockItemStockGroups',
 'Warehouse_Colors',
 'Sales_OrderLines',
 'Purchasing_Suppliers',
 'Warehouse_StockGroups',
 'Warehouse_StockItems',
 'Sales_InvoiceLines',
 'Sales_Customers',
 'Warehouse_PackageTypes',
 'Purchasing_PurchaseOrderLines',
 'Purchasing_SupplierCategories',
 'Sales_Orders']

In [281]:
len(db.list_collection_names())

18

In [282]:
for collection in db.list_collection_names():
    print(collection)
    pprint(db[collection].find_one())
    print()

Sales_CustomerTransactions
{'AmountExcludingTax': 2300.0,
 'CustomerID': 832,
 'CustomerTransactionID': 2,
 'FinalizationDate': '2013-01-02',
 'InvoiceID': 1,
 'IsFinalized': 1,
 'OutstandingBalance': 0.0,
 'TaxAmount': 345.0,
 'TransactionAmount': 2645.0,
 'TransactionDate': '2013-01-01',
 'TransactionTypeID': 1,
 '_id': 1}

Sales_Invoices
{'AccountsPersonID': 3032,
 'BillToCustomerID': 832,
 'ConfirmedDeliveryTime': '2013-01-02 07:05:00',
 'ConfirmedReceivedBy': 'Aakriti Byrraju',
 'ContactPersonID': 3032,
 'CustomerID': 832,
 'CustomerPurchaseOrderNumber': '12126',
 'DeliveryInstructions': 'Suite 24, 1345 Jun Avenue',
 'DeliveryMethodID': 3,
 'DeliveryRun': '',
 'InvoiceDate': '2013-01-01',
 'InvoiceID': 1,
 'IsCreditNote': 0,
 'OrderID': 1,
 'PackedByPersonID': 14,
 'ReturnedDeliveryData': '{"Events": [{ "Event":"Ready for '
                         'collection","EventTime":"2013-01-01T12:00:00","ConNote":"EAN-125-1051"},{ '
                         '"Event":"DeliveryAttempt","Even

In [283]:
# Compare sql table sizes to mongo collection sizes
for i in range(len(tables)):
    
    # Get MySQL table name
    table_name = tables[i][0]
    
    # Define our query
    query = ( "SELECT * " 
             "FROM " + table_name )
    
    # Execute query
    mycursor.execute( query )
    
    # Get row count for mongo db
    mongo_count = db[table_name].count_documents({})
    
    # Print row counts
    print(table_name)
    print('SQL db count:')
    print(len(mycursor.fetchall()))
    print('Mongo db count:')
    print(mongo_count)

Purchasing_PurchaseOrderLines
SQL db count:
8367
Mongo db count:
8367
Purchasing_PurchaseOrders
SQL db count:
2074
Mongo db count:
2074
Purchasing_SupplierCategories
SQL db count:
9
Mongo db count:
9
Purchasing_SupplierTransactions
SQL db count:
2438
Mongo db count:
2438
Purchasing_Suppliers
SQL db count:
13
Mongo db count:
13
Sales_CustomerCategories
SQL db count:
8
Mongo db count:
8
Sales_CustomerTransactions
SQL db count:
52000
Mongo db count:
52000
Sales_Customers
SQL db count:
663
Mongo db count:
663
Sales_InvoiceLines
SQL db count:
30000
Mongo db count:
30000
Sales_Invoices
SQL db count:
70510
Mongo db count:
70510
Sales_OrderLines
SQL db count:
76000
Mongo db count:
76000
Sales_Orders
SQL db count:
73595
Mongo db count:
73595


# Part 2: Add CSV data

## Imports

In [71]:
import pandas as pd
from tqdm.notebook import tqdm
from pprint import pprint
import numpy as np

## Connect to mongo database

In [5]:
# Connect to Mongo server

from pymongo import MongoClient

host="rhea.isegi.unl.pt"
port="27049"
user="GROUP_32"
password="bRG2XjRZhrRA9IfpmENyXxMlWQDUJdzL"
protocol="mongodb"
client = MongoClient(f"{protocol}://{user}:{password}@{host}:{port}")

In [6]:
# Connect to mongo db
db = client.#database name here

## Add csv data to mongo database

In [11]:
csv_dir_first_part = '/Users/philippmetzger/Documents/GitHub/BDMM/Homework 3/BDMM - 3rd Homework-20210520/csv/Project_WWI_'
table_names = ['Warehouse_Colors', 'Warehouse_PackageTypes', 'Warehouse_StockGroups', 
               'Warehouse_StockItems', 'Warehouse_StockItemStockGroups', 'Warehouse_StockItemTransactions']
warehouse_data = []
for name in table_names:
    path = csv_dir_first_part + name + '.csv'
    data = pd.read_csv(path)
    warehouse_data.append(data)

In [58]:
i = 0
warehouse_data[i]

Unnamed: 0,ColorID,ColorName
0,1,Azure
1,2,Beige
2,3,Black
3,4,Blue
4,5,Charcoal
5,6,Chartreuse
6,7,Cyan
7,8,Dark Brown
8,9,Dark Green
9,10,Fuchsia


In [50]:
for row_id in tqdm(range(warehouse_data[i].shape[0])):

    for col_id in range(warehouse_data[i].shape[1]):
        
        print(warehouse_data[i].iloc[row_id,col_id])
        
    print()

  0%|          | 0/10 [00:00<?, ?it/s]

1
Novelty Items

2
Clothing

3
Mugs

4
T-Shirts

5
Airline Novelties

6
Computing Novelties

7
USB Novelties

8
Furry Footwear

9
Toys

10
Packaging Materials



In [100]:
# Adapted from:
# https://nicksardo.wordpress.com/2015/11/24/transferring-data-between-mysql-and-mongodb/

def add_csv(table_index):
    
    table_name = table_names[i]
    
    # Print table name
    print('Migrating table', table_name)
    
    # Get names of the columns of this table
    cols = list(warehouse_data[i].columns)
    
    # Create mongodb collection
    collection = db[table_name]
    
    # Get row count and print it
    row_count = warehouse_data[i].shape[0]
    print('Rows in this table:', str(row_count))
    
    #mongo client specifically requires python dict
    cus = dict()

    #custom record id rather than mongodb default hash id                                          
    cid = 0                                                 

    #cycle through each mySQL row
    
    #for ( row ) in tqdm(mycursor):
    for row_id in tqdm(range(warehouse_data[i].shape[0])):
        
        cid        += 1   #increment id
        cus['_id'] = cid                                    

        #check if current row is null
        for col_id in range(warehouse_data[i].shape[1]):
            
            field = warehouse_data[i].iloc[row_id,col_id]
            
            if field == None:
                #if the record is null, skip it                  
                continue
            
            else:
                row_title      = "".join( cols[col_id] )  
                
                #if isinstance(field, Decimal):
                #    field = float(str(field))
                    
                #if isinstance(field, date):
                #    field = str(field)
                
                if isinstance(field, np.int64):
                    field = int(field)
                    
                if isinstance(field, np.bool_):
                    field = bool(field)
                
                
                
                
                #add current record's field's title and value             
                cus[row_title] = field
         
        try:
            #we've completed processing this row, insert it into mongoldb      
            collection.insert_one( cus )
        except Exception as e: 
            print(e)
            print(cus)
            print()

In [97]:
i = 5
i

5

In [77]:
add_csv(i)

Migrating table Warehouse_Colors
Rows in this table: 36


  0%|          | 0/36 [00:00<?, ?it/s]

In [81]:
db[table_names[i]].count_documents({})

36

In [79]:
add_csv(i)

Migrating table Warehouse_PackageTypes
Rows in this table: 14


  0%|          | 0/14 [00:00<?, ?it/s]

In [84]:
db[table_names[i]].count_documents({})

14

In [86]:
add_csv(i)

Migrating table Warehouse_StockGroups
Rows in this table: 10


  0%|          | 0/10 [00:00<?, ?it/s]

In [87]:
db[table_names[i]].count_documents({})

10

In [92]:
add_csv(i)

Migrating table Warehouse_StockItems
Rows in this table: 227


  0%|          | 0/227 [00:00<?, ?it/s]

In [93]:
db[table_names[i]].count_documents({})

227

In [95]:
add_csv(i)

Migrating table Warehouse_StockItemStockGroups
Rows in this table: 442


  0%|          | 0/442 [00:00<?, ?it/s]

In [96]:
db[table_names[i]].count_documents({})

442

In [98]:
add_csv(i)

Migrating table Warehouse_StockItemTransactions
Rows in this table: 236667


  0%|          | 0/236667 [00:00<?, ?it/s]

In [101]:
db[table_names[i]].count_documents({})

236667

In [102]:
# Compare sql table sizes to mongo collection sizes
for i in range(len(table_names)):
    
    # Get table name
    table_name = table_names[i]
    
    # Get row count for csv
    csv_count = warehouse_data[i].shape[0]
    
    # Get row count for mongo db
    mongo_count = db[table_name].count_documents({})
    
    # Print row counts
    print(table_name)
    print('Csv row count:')
    print(csv_count)
    print('Mongo db document count:')
    print(mongo_count)

Warehouse_Colors
Csv row count:
36
Mongo db document count:
36
Warehouse_PackageTypes
Csv row count:
14
Mongo db document count:
14
Warehouse_StockGroups
Csv row count:
10
Mongo db document count:
10
Warehouse_StockItems
Csv row count:
227
Mongo db document count:
227
Warehouse_StockItemStockGroups
Csv row count:
442
Mongo db document count:
442
Warehouse_StockItemTransactions
Csv row count:
236667
Mongo db document count:
236667


# Part 3: Add JSON data

## Imports

In [20]:
import json
from tqdm.notebook import tqdm

## Connect to mongo database

In [7]:
# Connect to Mongo server

from pymongo import MongoClient

host="rhea.isegi.unl.pt"
port="27049"
user="GROUP_32"
password="bRG2XjRZhrRA9IfpmENyXxMlWQDUJdzL"
protocol="mongodb"
client = MongoClient(f"{protocol}://{user}:{password}@{host}:{port}")

In [8]:
# Connect to mongo db
db = client.#database name here

## Add json data to mongo database

In [9]:
json_dir_first_part = '/Users/philippmetzger/Documents/GitHub/BDMM/Homework 3/BDMM - 3rd Homework-20210520/json/Project_WWI_'
json_names = ['Application_Cities', 'Application_Countries', 'Application_DeliveryMethods', 
              'Application_PaymentMethods', 'Application_People', 'Application_StateProvinces',
              'Application_TransactionTypes']
application_data = []

In [21]:
# Adapted from:
# https://www.geeksforgeeks.org/how-to-import-json-file-in-mongodb-using-python/

def add_json(file_index):
    
    path = json_dir_first_part + json_names[file_index] + '.json'
    
    collection = db[json_names[file_index]]
    
    with open(path) as file:
        file_data = json.load(file)
        
    if isinstance(file_data, list):
        collection.insert_many(file_data)  
    else:
        collection.insert_one(file_data)

In [22]:
for i in tqdm(range(7)):
    add_json(i)

  0%|          | 0/7 [00:00<?, ?it/s]

# Part 4: Denormalise the database

## Imports

In [1]:
from pymongo import MongoClient
from tqdm.notebook import tqdm
from pprint import pprint
import numpy as np
import math

## Connect to mongo db

In [2]:
# Connect to Mongo server
host="rhea.isegi.unl.pt"
port="27049"
user="GROUP_32"
password="bRG2XjRZhrRA9IfpmENyXxMlWQDUJdzL"
protocol="mongodb"
client = MongoClient(f"{protocol}://{user}:{password}@{host}:{port}")

# Connect to mongo db
db = client.#database name here

## Define an embed() function

In [3]:
def embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection):
    
    # Get the distinct values in the parent collection that point to the child collection's entries of interest
    linking_attribute_values_in_parent_collection = db[parent_collection].find( {}, {linking_attribute_parent:1, '_id':0}).distinct(linking_attribute_parent)

    # Iterate over them
    for value in tqdm(linking_attribute_values_in_parent_collection):

        # If there is a nan, skip it
        if not math.isnan(value):
            
            # Find the respective documents in the child collection (that will be embedded)
            to_embed = list(db[child_collection].find( {linking_attribute_child:value}, {'_id':0} ))

            # If only one document will be embedded, remove the list around it
            # If multiple documents will be embedded, they will be interted as a list
            if len(to_embed) == 1:
                to_embed = to_embed[0]

            # Embed them in the parent collection's respective entries
            db[parent_collection].update_many( {linking_attribute_parent:value}, {'$set': {new_attribute:to_embed} } )
            
            # Commented out because descreases usability in certain situations:
            # Remove ID field from parent collection
            #db[parent_collection].update_many( {linking_attribute_parent:value}, {'$unset': { linking_attribute_parent:'' } } )

## Embed Application_Countries in Application_StateProvinces

In [4]:
linking_attribute_parent = 'CountryID'
linking_attribute_child = 'CountryID'
new_attribute = 'Country'
parent_collection = 'Application_StateProvinces'
child_collection = 'Application_Countries'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

pprint(list(db[parent_collection].find( {new_attribute: {"$exists": True} } ).limit(1)))

  0%|          | 0/1 [00:00<?, ?it/s]

[{'Country': {'Continent': 'North America',
              'CountryID': 230,
              'CountryName': 'United States',
              'CountryType': 'UN Member State',
              'FormalName': 'United States of America',
              'IsoAlpha3Code': 'USA',
              'IsoNumericCode': 840,
              'LatestRecordedPopulation': 313973000,
              'Region': 'Americas',
              'Subregion': 'Northern America'},
  'CountryID': 230,
  'LatestRecordedPopulation': 5437278,
  'SalesTerritory': 'Southeast',
  'StateProvinceCode': 'AL',
  'StateProvinceID': 1,
  'StateProvinceName': 'Alabama',
  '_id': ObjectId('60a9208b4b95d601fc64696e')}]


## Embed Application_StateProvinces in Application_Cities

In [5]:
linking_attribute_parent = 'StateProvinceID'
linking_attribute_child = 'StateProvinceID'
new_attribute = 'StateProvince'
parent_collection = 'Application_Cities'
child_collection = 'Application_StateProvinces'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

pprint(list(db[parent_collection].find( {new_attribute: {"$exists": True} } ).limit(1)))

  0%|          | 0/53 [00:00<?, ?it/s]

[{'CityID': 1,
  'CityName': 'Aaronsburg',
  'LatestRecordedPopulation': 613,
  'Location': '0xE6100000010C07E11B542C73444087C09140035D53C0',
  'StateProvince': {'Country': {'Continent': 'North America',
                                'CountryID': 230,
                                'CountryName': 'United States',
                                'CountryType': 'UN Member State',
                                'FormalName': 'United States of America',
                                'IsoAlpha3Code': 'USA',
                                'IsoNumericCode': 840,
                                'LatestRecordedPopulation': 313973000,
                                'Region': 'Americas',
                                'Subregion': 'Northern America'},
                    'CountryID': 230,
                    'LatestRecordedPopulation': 13284753,
                    'SalesTerritory': 'Mideast',
                    'StateProvinceCode': 'PA',
                    'StateProvinceID': 39,
     

## Embed Application_Cities in Purchasing_Suppliers (both DeliveryCity and PostalCity)

In [6]:
linking_attribute_parent = 'DeliveryCityID'
linking_attribute_child = 'CityID'
new_attribute = 'DeliveryCity'
parent_collection = 'Purchasing_Suppliers'
child_collection = 'Application_Cities'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

linking_attribute_parent = 'PostalCityID'
linking_attribute_child = 'CityID'
new_attribute = 'PostalCity'
parent_collection = 'Purchasing_Suppliers'
child_collection = 'Application_Cities'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

pprint(list(db[parent_collection].find( {new_attribute: {"$exists": True} } ).limit(1)))

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

[{'AlternateContactPersonID': 22,
  'BankAccountBranch': 'Woodgrove Bank Zionsville',
  'BankAccountCode': '356981',
  'BankAccountName': 'A Datum Corporation',
  'BankAccountNumber': '8575824136',
  'BankInternationalCode': '25986',
  'DeliveryAddressLine1': 'Suite 10',
  'DeliveryAddressLine2': '183838 Southwest Boulevard',
  'DeliveryCity': {'CityID': 38171,
                   'CityName': 'Zionsville',
                   'LatestRecordedPopulation': 14160,
                   'Location': '0xE6100000010CDE115F37B6F9434031276893C39055C0',
                   'StateProvince': {'Country': {'Continent': 'North America',
                                                 'CountryID': 230,
                                                 'CountryName': 'United States',
                                                 'CountryType': 'UN Member '
                                                                'State',
                                                 'FormalName': 'United States '

## Embed Application_Cities in Sales_Customers (both DeliveryCity and PostalCity)

In [7]:
linking_attribute_parent = 'DeliveryCityID'
linking_attribute_child = 'CityID'
new_attribute = 'DeliveryCity'
parent_collection = 'Sales_Customers'
child_collection = 'Application_Cities'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

linking_attribute_parent = 'PostalCityID'
linking_attribute_child = 'CityID'
new_attribute = 'PostalCity'
parent_collection = 'Sales_Customers'
child_collection = 'Application_Cities'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

pprint(list(db[parent_collection].find( {new_attribute: {"$exists": True} } ).limit(1)))

  0%|          | 0/655 [00:00<?, ?it/s]

  0%|          | 0/655 [00:00<?, ?it/s]

[{'AccountOpenedDate': '2013-01-01',
  'AlternateContactPersonID': 1002,
  'BillToCustomerID': 1,
  'CustomerCategoryID': 3,
  'CustomerID': 1,
  'CustomerName': 'Tailspin Toys (Head Office)',
  'DeliveryAddressLine1': 'Shop 38',
  'DeliveryAddressLine2': '1877 Mittal Road',
  'DeliveryCity': {'CityID': 19586,
                   'CityName': 'Lisco',
                   'LatestRecordedPopulation': None,
                   'Location': '0xE6100000010CE73F5A52A4BF444010638852B1A759C0',
                   'StateProvince': {'Country': {'Continent': 'North America',
                                                 'CountryID': 230,
                                                 'CountryName': 'United States',
                                                 'CountryType': 'UN Member '
                                                                'State',
                                                 'FormalName': 'United States '
                                                         

## Embed Application_DeliveryMethods in Purchasing_PurchaseOrders

In [8]:
linking_attribute_parent = 'DeliveryMethodID'
linking_attribute_child = 'DeliveryMethodID'
new_attribute = 'DeliveryMethod'
parent_collection = 'Purchasing_PurchaseOrders'
child_collection = 'Application_DeliveryMethods'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

pprint(list(db[parent_collection].find( {new_attribute: {"$exists": True} } ).limit(1)))

  0%|          | 0/5 [00:00<?, ?it/s]

[{'ContactPersonID': 2,
  'DeliveryMethod': {'DeliveryMethodID': 2, 'DeliveryMethodName': 'Courier'},
  'DeliveryMethodID': 2,
  'ExpectedDeliveryDate': '2014-09-18',
  'IsOrderFinalized': 1,
  'OrderDate': '2014-08-29',
  'PurchaseOrderID': 1001,
  'SupplierID': 7,
  'SupplierReference': 'BC0280982',
  '_id': 1001}]


## Embed Application_DeliveryMethods in Sales_Invoices

In [9]:
linking_attribute_parent = 'DeliveryMethodID'
linking_attribute_child = 'DeliveryMethodID'
new_attribute = 'DeliveryMethod'
parent_collection = 'Sales_Invoices'
child_collection = 'Application_DeliveryMethods'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

pprint(list(db[parent_collection].find( {new_attribute: {"$exists": True} } ).limit(1)))

  0%|          | 0/1 [00:00<?, ?it/s]

[{'AccountsPersonID': 2001,
  'BillToCustomerID': 401,
  'ConfirmedDeliveryTime': '2013-02-10 08:40:00',
  'ConfirmedReceivedBy': 'Klara Rakus',
  'ContactPersonID': 2165,
  'CustomerID': 483,
  'CustomerPurchaseOrderNumber': '18172',
  'DeliveryInstructions': 'Suite 275, 593 Huq Avenue',
  'DeliveryMethod': {'DeliveryMethodID': 3,
                     'DeliveryMethodName': 'Delivery Van'},
  'DeliveryMethodID': 3,
  'DeliveryRun': '',
  'InvoiceDate': '2013-02-09',
  'InvoiceID': 1989,
  'IsCreditNote': 0,
  'OrderID': 2041,
  'PackedByPersonID': 7,
  'ReturnedDeliveryData': '{"Events": [{ "Event":"Ready for '
                          'collection","EventTime":"2013-02-09T12:00:00","ConNote":"EAN-125-3039"},{ '
                          '"Event":"DeliveryAttempt","EventTime":"2013-02-10T08:40:00","ConNote":"EAN-125-3039","DriverID":11,"Latitude":33.8487376,"Longitude":-82.2598460,"Status":"Delivered"}],"DeliveredWhen":"2013-02-10T08:40:00","ReceivedBy":"Klara '
                       

## Embed Application_DeliveryMethods in Purchasing_Suppliers

In [10]:
linking_attribute_parent = 'DeliveryMethodID'
linking_attribute_child = 'DeliveryMethodID'
new_attribute = 'DeliveryMethod'
parent_collection = 'Purchasing_Suppliers'
child_collection = 'Application_DeliveryMethods'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

pprint(list(db[parent_collection].find( {new_attribute: {"$exists": True} } ).limit(1)))

  0%|          | 0/5 [00:00<?, ?it/s]

[{'AlternateContactPersonID': 22,
  'BankAccountBranch': 'Woodgrove Bank Zionsville',
  'BankAccountCode': '356981',
  'BankAccountName': 'A Datum Corporation',
  'BankAccountNumber': '8575824136',
  'BankInternationalCode': '25986',
  'DeliveryAddressLine1': 'Suite 10',
  'DeliveryAddressLine2': '183838 Southwest Boulevard',
  'DeliveryCity': {'CityID': 38171,
                   'CityName': 'Zionsville',
                   'LatestRecordedPopulation': 14160,
                   'Location': '0xE6100000010CDE115F37B6F9434031276893C39055C0',
                   'StateProvince': {'Country': {'Continent': 'North America',
                                                 'CountryID': 230,
                                                 'CountryName': 'United States',
                                                 'CountryType': 'UN Member '
                                                                'State',
                                                 'FormalName': 'United States '

## Embed Application_DeliveryMethods in Sales_Customers

In [11]:
linking_attribute_parent = 'DeliveryMethodID'
linking_attribute_child = 'DeliveryMethodID'
new_attribute = 'DeliveryMethod'
parent_collection = 'Sales_Customers'
child_collection = 'Application_DeliveryMethods'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

pprint(list(db[parent_collection].find( {new_attribute: {"$exists": True} } ).limit(1)))

  0%|          | 0/1 [00:00<?, ?it/s]

[{'AccountOpenedDate': '2013-01-01',
  'AlternateContactPersonID': 1002,
  'BillToCustomerID': 1,
  'CustomerCategoryID': 3,
  'CustomerID': 1,
  'CustomerName': 'Tailspin Toys (Head Office)',
  'DeliveryAddressLine1': 'Shop 38',
  'DeliveryAddressLine2': '1877 Mittal Road',
  'DeliveryCity': {'CityID': 19586,
                   'CityName': 'Lisco',
                   'LatestRecordedPopulation': None,
                   'Location': '0xE6100000010CE73F5A52A4BF444010638852B1A759C0',
                   'StateProvince': {'Country': {'Continent': 'North America',
                                                 'CountryID': 230,
                                                 'CountryName': 'United States',
                                                 'CountryType': 'UN Member '
                                                                'State',
                                                 'FormalName': 'United States '
                                                         

## Embed Sales_CustomerCategories in Sales_Customers

In [12]:
linking_attribute_parent = 'CustomerCategoryID'
linking_attribute_child = 'CustomerCategoryID'
new_attribute = 'CustomerCategory'
parent_collection = 'Sales_Customers'
child_collection = 'Sales_CustomerCategories'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

pprint(list(db[parent_collection].find( {new_attribute: {"$exists": True} } ).limit(1)))

  0%|          | 0/5 [00:00<?, ?it/s]

[{'AccountOpenedDate': '2013-01-01',
  'AlternateContactPersonID': 1002,
  'BillToCustomerID': 1,
  'CustomerCategory': {'CustomerCategoryID': 3,
                       'CustomerCategoryName': 'Novelty Shop'},
  'CustomerCategoryID': 3,
  'CustomerID': 1,
  'CustomerName': 'Tailspin Toys (Head Office)',
  'DeliveryAddressLine1': 'Shop 38',
  'DeliveryAddressLine2': '1877 Mittal Road',
  'DeliveryCity': {'CityID': 19586,
                   'CityName': 'Lisco',
                   'LatestRecordedPopulation': None,
                   'Location': '0xE6100000010CE73F5A52A4BF444010638852B1A759C0',
                   'StateProvince': {'Country': {'Continent': 'North America',
                                                 'CountryID': 230,
                                                 'CountryName': 'United States',
                                                 'CountryType': 'UN Member '
                                                                'State',
                         

## Embed Purchasing_SupplierCategories in Purchasing_Suppliers

In [13]:
linking_attribute_parent = 'SupplierCategoryID'
linking_attribute_child = 'SupplierCategoryID'
new_attribute = 'SupplierCategory'
parent_collection = 'Purchasing_Suppliers'
child_collection = 'Purchasing_SupplierCategories'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

pprint(list(db[parent_collection].find( {new_attribute: {"$exists": True} } ).limit(1)))

  0%|          | 0/8 [00:00<?, ?it/s]

[{'AlternateContactPersonID': 22,
  'BankAccountBranch': 'Woodgrove Bank Zionsville',
  'BankAccountCode': '356981',
  'BankAccountName': 'A Datum Corporation',
  'BankAccountNumber': '8575824136',
  'BankInternationalCode': '25986',
  'DeliveryAddressLine1': 'Suite 10',
  'DeliveryAddressLine2': '183838 Southwest Boulevard',
  'DeliveryCity': {'CityID': 38171,
                   'CityName': 'Zionsville',
                   'LatestRecordedPopulation': 14160,
                   'Location': '0xE6100000010CDE115F37B6F9434031276893C39055C0',
                   'StateProvince': {'Country': {'Continent': 'North America',
                                                 'CountryID': 230,
                                                 'CountryName': 'United States',
                                                 'CountryType': 'UN Member '
                                                                'State',
                                                 'FormalName': 'United States '

## Embed Warehouse_StockGroups in Warehouse_StockItemStockGroups

In [14]:
linking_attribute_parent = 'StockGroupID'
linking_attribute_child = 'StockGroupID'
new_attribute = 'StockItemStockGroup'
parent_collection = 'Warehouse_StockItemStockGroups'
child_collection = 'Warehouse_StockGroups'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

pprint(list(db[parent_collection].find( {new_attribute: {"$exists": True} } ).limit(1)))

  0%|          | 0/9 [00:00<?, ?it/s]

[{'StockGroupID': 6,
  'StockItemID': 1,
  'StockItemStockGroup': {'StockGroupID': 6,
                          'StockGroupName': 'Computing Novelties'},
  'StockItemStockGroupID': 1,
  '_id': 1}]


## Embed Warehouse_StockItemStockGroups in Warehouse_StockItems

In [15]:
linking_attribute_parent = 'StockItemID'
linking_attribute_child = 'StockItemID'
new_attribute = 'StockGroup'
parent_collection = 'Warehouse_StockItems'
child_collection = 'Warehouse_StockItemStockGroups'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

pprint(db[parent_collection].find_one())

  0%|          | 0/227 [00:00<?, ?it/s]

{'Barcode': nan,
 'Brand': nan,
 'ColorID': nan,
 'InternalComments': nan,
 'IsChillerStock': False,
 'LeadTimeDays': 14,
 'MarketingComments': 'Complete with 12 projectiles',
 'OuterPackageID': 7,
 'Photo': nan,
 'QuantityPerOuter': 1,
 'RecommendedRetailPrice': 37.38,
 'SearchDetails': 'USB missile launcher (Green) Complete with 12 projectiles',
 'Size': nan,
 'StockGroup': [{'StockGroupID': 6,
                 'StockItemID': 1,
                 'StockItemStockGroup': {'StockGroupID': 6,
                                         'StockGroupName': 'Computing '
                                                           'Novelties'},
                 'StockItemStockGroupID': 1},
                {'StockGroupID': 1,
                 'StockItemID': 1,
                 'StockItemStockGroup': {'StockGroupID': 1,
                                         'StockGroupName': 'Novelty Items'},
                 'StockItemStockGroupID': 2},
                {'StockGroupID': 7,
                 'StockI

## Embed Application_PaymentMethods in Purchasing_SupplierTransactions

In [16]:
linking_attribute_parent = 'PaymentMethodID'
linking_attribute_child = 'PaymentMethodID'
new_attribute = 'PaymentMethod'
parent_collection = 'Purchasing_SupplierTransactions'
child_collection = 'Application_PaymentMethods'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

pprint(list(db[parent_collection].find( {new_attribute: {"$exists": True} } ).limit(1)))

  0%|          | 0/1 [00:00<?, ?it/s]

[{'AmountExcludingTax': 194225.7,
  'FinalizationDate': '2014-06-02',
  'IsFinalized': 1,
  'OutstandingBalance': 0.0,
  'PaymentMethod': {'PaymentMethodID': 4, 'PaymentMethodName': 'EFT'},
  'PaymentMethodID': 4,
  'PurchaseOrderID': 847,
  'SupplierID': 7,
  'SupplierInvoiceNumber': '3687',
  'SupplierTransactionID': 127482,
  'TaxAmount': 29133.86,
  'TransactionAmount': 223359.56,
  'TransactionDate': '2014-05-28',
  'TransactionTypeID': 5,
  '_id': 1001}]


## Embed Application_PaymentMethods in Sales_CustomerTransactions

In [17]:
linking_attribute_parent = 'PaymentMethodID'
linking_attribute_child = 'PaymentMethodID'
new_attribute = 'PaymentMethod'
parent_collection = 'Sales_CustomerTransactions'
child_collection = 'Application_PaymentMethods'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

pprint(list(db[parent_collection].find( {new_attribute: {"$exists": True} } ).limit(1)))

  0%|          | 0/1 [00:00<?, ?it/s]

[{'AmountExcludingTax': 0.0,
  'CustomerID': 1,
  'CustomerTransactionID': 231,
  'FinalizationDate': '2013-01-02',
  'InvoiceID': 41,
  'IsFinalized': 1,
  'OutstandingBalance': 0.0,
  'PaymentMethod': {'PaymentMethodID': 4, 'PaymentMethodName': 'EFT'},
  'PaymentMethodID': 4,
  'TaxAmount': 0.0,
  'TransactionAmount': -9513.49,
  'TransactionDate': '2013-01-02',
  'TransactionTypeID': 3,
  '_id': 42}]


## Embed Application_TransactionTypes in Purchasing_SupplierTransactions

In [18]:
linking_attribute_parent = 'TransactionTypeID'
linking_attribute_child = 'TransactionTypeID'
new_attribute = 'TransactionType'
parent_collection = 'Purchasing_SupplierTransactions'
child_collection = 'Application_TransactionTypes'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

pprint(list(db[parent_collection].find( {new_attribute: {"$exists": True} } ).limit(1)))

  0%|          | 0/2 [00:00<?, ?it/s]

[{'AmountExcludingTax': 194225.7,
  'FinalizationDate': '2014-06-02',
  'IsFinalized': 1,
  'OutstandingBalance': 0.0,
  'PaymentMethod': {'PaymentMethodID': 4, 'PaymentMethodName': 'EFT'},
  'PaymentMethodID': 4,
  'PurchaseOrderID': 847,
  'SupplierID': 7,
  'SupplierInvoiceNumber': '3687',
  'SupplierTransactionID': 127482,
  'TaxAmount': 29133.86,
  'TransactionAmount': 223359.56,
  'TransactionDate': '2014-05-28',
  'TransactionType': {'TransactionTypeID': 5,
                      'TransactionTypeName': 'Supplier Invoice'},
  'TransactionTypeID': 5,
  '_id': 1001}]


## Embed Application_TransactionTypes in Sales_CustomerTransactions

In [19]:
linking_attribute_parent = 'TransactionTypeID'
linking_attribute_child = 'TransactionTypeID'
new_attribute = 'TransactionType'
parent_collection = 'Sales_CustomerTransactions'
child_collection = 'Application_TransactionTypes'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

pprint(list(db[parent_collection].find( {new_attribute: {"$exists": True} } ).limit(1)))

  0%|          | 0/2 [00:00<?, ?it/s]

[{'AmountExcludingTax': 2300.0,
  'CustomerID': 832,
  'CustomerTransactionID': 2,
  'FinalizationDate': '2013-01-02',
  'InvoiceID': 1,
  'IsFinalized': 1,
  'OutstandingBalance': 0.0,
  'TaxAmount': 345.0,
  'TransactionAmount': 2645.0,
  'TransactionDate': '2013-01-01',
  'TransactionType': {'TransactionTypeID': 1,
                      'TransactionTypeName': 'Customer Invoice'},
  'TransactionTypeID': 1,
  '_id': 1}]


## Embed Application_TransactionTypes in Warehouse_StockItemTransactions

In [20]:
linking_attribute_parent = 'TransactionTypeID'
linking_attribute_child = 'TransactionTypeID'
new_attribute = 'TransactionType'
parent_collection = 'Warehouse_StockItemTransactions'
child_collection = 'Application_TransactionTypes'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

pprint(list(db[parent_collection].find( {new_attribute: {"$exists": True} } ).limit(1)))

  0%|          | 0/3 [00:00<?, ?it/s]

[{'CustomerID': 832.0,
  'InvoiceID': 1.0,
  'PurchaseOrderID': nan,
  'Quantity': -10.0,
  'StockItemID': 67,
  'StockItemTransactionID': 1,
  'SupplierID': nan,
  'TransactionOccurredWhen': '2013-01-01 12:00:00.0000000',
  'TransactionType': {'TransactionTypeID': 10,
                      'TransactionTypeName': 'Stock Issue'},
  'TransactionTypeID': 10,
  '_id': 1}]


## Embed Warehouse_Colors in Warehouse_StockItems

In [21]:
linking_attribute_parent = 'ColorID'
linking_attribute_child = 'ColorID'
new_attribute = 'Color'
parent_collection = 'Warehouse_StockItems'
child_collection = 'Warehouse_Colors'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

pprint(list(db[parent_collection].find( {new_attribute: {"$exists": True} } ).limit(1)))

  0%|          | 0/8 [00:00<?, ?it/s]

[{'Barcode': nan,
  'Brand': nan,
  'Color': {'ColorID': 12, 'ColorName': 'Steel Gray'},
  'ColorID': 12.0,
  'InternalComments': nan,
  'IsChillerStock': False,
  'LeadTimeDays': 14,
  'MarketingComments': 'Complete with 12 projectiles',
  'OuterPackageID': 7,
  'Photo': nan,
  'QuantityPerOuter': 1,
  'RecommendedRetailPrice': 37.38,
  'SearchDetails': 'USB rocket launcher (Gray) Complete with 12 projectiles',
  'Size': nan,
  'StockGroup': [{'StockGroupID': 6,
                  'StockItemID': 2,
                  'StockItemStockGroup': {'StockGroupID': 6,
                                          'StockGroupName': 'Computing '
                                                            'Novelties'},
                  'StockItemStockGroupID': 4},
                 {'StockGroupID': 1,
                  'StockItemID': 2,
                  'StockItemStockGroup': {'StockGroupID': 1,
                                          'StockGroupName': 'Novelty Items'},
                  'StockItemS

## Embed Warehouse_PackageTypes in Warehouse_StockItems (both UnitPackage and OuterPackage)

In [22]:
linking_attribute_parent = 'UnitPackageID'
linking_attribute_child = 'PackageTypeID'
new_attribute = 'UnitPackageType'
parent_collection = 'Warehouse_StockItems'
child_collection = 'Warehouse_PackageTypes'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

linking_attribute_parent = 'OuterPackageID'
linking_attribute_child = 'PackageTypeID'
new_attribute = 'OuterPackageType'
parent_collection = 'Warehouse_StockItems'
child_collection = 'Warehouse_PackageTypes'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

pprint(list(db[parent_collection].find( {new_attribute: {"$exists": True} } ).limit(1)))

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

[{'Barcode': nan,
  'Brand': nan,
  'ColorID': nan,
  'InternalComments': nan,
  'IsChillerStock': False,
  'LeadTimeDays': 14,
  'MarketingComments': 'Complete with 12 projectiles',
  'OuterPackageID': 7,
  'OuterPackageType': {'PackageTypeID': 7, 'PackageTypeName': 'Each'},
  'Photo': nan,
  'QuantityPerOuter': 1,
  'RecommendedRetailPrice': 37.38,
  'SearchDetails': 'USB missile launcher (Green) Complete with 12 projectiles',
  'Size': nan,
  'StockGroup': [{'StockGroupID': 6,
                  'StockItemID': 1,
                  'StockItemStockGroup': {'StockGroupID': 6,
                                          'StockGroupName': 'Computing '
                                                            'Novelties'},
                  'StockItemStockGroupID': 1},
                 {'StockGroupID': 1,
                  'StockItemID': 1,
                  'StockItemStockGroup': {'StockGroupID': 1,
                                          'StockGroupName': 'Novelty Items'},
            

## Embed Warehouse_PackageTypes in Purchasing_PurchaseOrderLines

In [23]:
linking_attribute_parent = 'PackageTypeID'
linking_attribute_child = 'PackageTypeID'
new_attribute = 'PackageType'
parent_collection = 'Purchasing_PurchaseOrderLines'
child_collection = 'Warehouse_PackageTypes'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

pprint(list(db[parent_collection].find( {new_attribute: {"$exists": True} } ).limit(1)))

  0%|          | 0/3 [00:00<?, ?it/s]

[{'Description': '"The Gu" red shirt XML tag t-shirt (White) XXS',
  'ExpectedUnitPricePerOuter': 84.0,
  'IsOrderLineFinalized': 1,
  'OrderedOuters': 1073,
  'PackageType': {'PackageTypeID': 6, 'PackageTypeName': 'Carton'},
  'PackageTypeID': 6,
  'PurchaseOrderID': 784,
  'PurchaseOrderLineID': 3001,
  'ReceivedOuters': 1073,
  'StockItemID': 77,
  '_id': 3001}]


## Embed Warehouse_PackageTypes in Sales_InvoiceLines

In [24]:
linking_attribute_parent = 'PackageTypeID'
linking_attribute_child = 'PackageTypeID'
new_attribute = 'PackageType'
parent_collection = 'Sales_InvoiceLines'
child_collection = 'Warehouse_PackageTypes'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

pprint(list(db[parent_collection].find( {new_attribute: {"$exists": True} } ).limit(1)))

  0%|          | 0/3 [00:00<?, ?it/s]

[{'Description': '32 mm Double sided bubble wrap 50m',
  'ExtendedPrice': 3864.0,
  'InvoiceID': 978,
  'InvoiceLineID': 3001,
  'LineProfit': 1590.0,
  'PackageType': {'PackageTypeID': 7, 'PackageTypeName': 'Each'},
  'PackageTypeID': 7,
  'Quantity': 30,
  'StockItemID': 164,
  'TaxAmount': 504.0,
  'TaxRate': 15.0,
  'UnitPrice': 112.0,
  '_id': 3001}]


## Embed Warehouse_PackageTypes in Sales_OrderLines

In [25]:
linking_attribute_parent = 'PackageTypeID'
linking_attribute_child = 'PackageTypeID'
new_attribute = 'PackageType'
parent_collection = 'Sales_OrderLines'
child_collection = 'Warehouse_PackageTypes'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

pprint(list(db[parent_collection].find( {new_attribute: {"$exists": True} } ).limit(1)))

  0%|          | 0/3 [00:00<?, ?it/s]

[{'Description': '10 mm Double sided bubble wrap 50m',
  'OrderID': 682,
  'OrderLineID': 2001,
  'PackageType': {'PackageTypeID': 7, 'PackageTypeName': 'Each'},
  'PackageTypeID': 7,
  'PickedQuantity': 20,
  'PickingCompletedWhen': '2013-01-12 11:00:00',
  'Quantity': 20,
  'StockItemID': 158,
  'TaxRate': 15.0,
  'UnitPrice': 105.0,
  '_id': 2001}]


## Embed Purchasing_PurchaseOrderLines in Purchasing_PurchaseOrders

In [26]:
linking_attribute_parent = 'PurchaseOrderID'
linking_attribute_child = 'PurchaseOrderID'
new_attribute = 'PurchaseOrderLines'
parent_collection = 'Purchasing_PurchaseOrders'
child_collection = 'Purchasing_PurchaseOrderLines'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

pprint(list(db[parent_collection].find( {new_attribute: {"$exists": True} } ).limit(1)))

  0%|          | 0/2074 [00:00<?, ?it/s]

[{'ContactPersonID': 2,
  'DeliveryMethod': {'DeliveryMethodID': 2, 'DeliveryMethodName': 'Courier'},
  'DeliveryMethodID': 2,
  'ExpectedDeliveryDate': '2014-09-18',
  'IsOrderFinalized': 1,
  'OrderDate': '2014-08-29',
  'PurchaseOrderID': 1001,
  'PurchaseOrderLines': [{'Description': 'Shipping carton (Brown) '
                                         '305x305x305mm',
                          'ExpectedUnitPricePerOuter': 47.5,
                          'IsOrderLineFinalized': 1,
                          'OrderedOuters': 239,
                          'PackageType': {'PackageTypeID': 7,
                                          'PackageTypeName': 'Each'},
                          'PackageTypeID': 7,
                          'PurchaseOrderID': 1001,
                          'PurchaseOrderLineID': 3901,
                          'ReceivedOuters': 239,
                          'StockItemID': 184},
                         {'Description': 'Black and orange glass with care '
       

## Embed Sales_InvoiceLines in Sales_Invoices

In [27]:
linking_attribute_parent = 'InvoiceID'
linking_attribute_child = 'InvoiceID'
new_attribute = 'InvoiceLines'
parent_collection = 'Sales_Invoices'
child_collection = 'Sales_InvoiceLines'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

pprint(list(db[parent_collection].find( {new_attribute: {"$exists": True} } ).limit(1)))

  0%|          | 0/70510 [00:00<?, ?it/s]

[{'AccountsPersonID': 2001,
  'BillToCustomerID': 401,
  'ConfirmedDeliveryTime': '2013-02-10 08:40:00',
  'ConfirmedReceivedBy': 'Klara Rakus',
  'ContactPersonID': 2165,
  'CustomerID': 483,
  'CustomerPurchaseOrderNumber': '18172',
  'DeliveryInstructions': 'Suite 275, 593 Huq Avenue',
  'DeliveryMethod': {'DeliveryMethodID': 3,
                     'DeliveryMethodName': 'Delivery Van'},
  'DeliveryMethodID': 3,
  'DeliveryRun': '',
  'InvoiceDate': '2013-02-09',
  'InvoiceID': 1989,
  'InvoiceLines': [{'Description': 'Developer joke mug - Oct 31 = Dec 25 '
                                   '(White)',
                    'ExtendedPrice': 119.6,
                    'InvoiceID': 1989,
                    'InvoiceLineID': 6441,
                    'LineProfit': 68.0,
                    'PackageType': {'PackageTypeID': 7,
                                    'PackageTypeName': 'Each'},
                    'PackageTypeID': 7,
                    'Quantity': 8,
                    'Stock

## Embed Sales_OrderLines in SalesOrders

In [28]:
linking_attribute_parent = 'OrderID'
linking_attribute_child = 'OrderID'
new_attribute = 'OrderLines'
parent_collection = 'Sales_Orders'
child_collection = 'Sales_OrderLines'

embed(linking_attribute_parent, linking_attribute_child, new_attribute, parent_collection, child_collection)

pprint(list(db[parent_collection].find( {new_attribute: {"$exists": True} } ).limit(1)))

  0%|          | 0/73595 [00:00<?, ?it/s]

[{'BackorderOrderID': 1037,
  'ContactPersonID': 3012,
  'CustomerID': 812,
  'CustomerPurchaseOrderNumber': '16322',
  'ExpectedDeliveryDate': '2013-01-21',
  'IsUndersupplyBackordered': 1,
  'OrderDate': '2013-01-18',
  'OrderID': 1001,
  'OrderLines': [{'Description': 'USB food flash drive - fortune cookie',
                  'OrderID': 1001,
                  'OrderLineID': 3053,
                  'PackageType': {'PackageTypeID': 7,
                                  'PackageTypeName': 'Each'},
                  'PackageTypeID': 7,
                  'PickedQuantity': 7,
                  'PickingCompletedWhen': '2013-01-18 11:00:00',
                  'Quantity': 7,
                  'StockItemID': 14,
                  'TaxRate': 15.0,
                  'UnitPrice': 32.0},
                 {'Description': 'IT joke mug - that behavior is by design '
                                 '(White)',
                  'OrderID': 1001,
                  'OrderLineID': 3054,
                 