# Bulk Ops

In [None]:
from pymongo import MongoClient

client = MongoClient('mongodb://mongodb:27017/')
db = client.bulk_ops

# Ensure collections are clean
db.customers.drop()
db.subscriptions.drop()

# Create collections
db.create_collection('customers')
db.create_collection('subscriptions')

In [None]:
import json

def load_jsonl(file_path):
    with open(file_path, 'r') as file:
        for line in file:
            yield json.loads(line)

## Single Ops

In [None]:
from bson import ObjectId, DBRef
import time

def insert_single_ops(file_path):
    start_time = time.time()

    for record in load_jsonl(file_path):
        data = record["customer"]
        merchant = data["merchant"]
        external_id = data["merchant_user_id"]
        q_filter = {"merchant": merchant, "external_id": external_id}
        q_update = {"data": data}
        
        existing_doc = db.customers.find_one(q_filter)
        if not existing_doc:
            customer_obj_id = ObjectId()
            q_update['_id'] = customer_obj_id  # Pre-generate ID for relationships
            
        customer = db.customers.find_one_and_update(
            q_filter,
            {"$set": q_update}, upsert=True, new=True)

        # Subscriptions
        for subscription in record["subscriptions"]:
            subscription_obj_id = ObjectId()
            subscription_doc = {
                "_id": subscription_obj_id,
                "customer": DBRef("customers", customer['_id']),
                "data": subscription
            }
            db.subscriptions.insert_one(subscription_doc)  # Inserts to simplify, this should be upserts

    end_time = time.time()
    print("Single Operations Write Time:", end_time - start_time)

In [None]:
# File paths for the sample data
# import subprocess; results = subprocess.run(['ls', '-lshalt', 'work'], capture_output=True, text=True)
# print(results.stdout)
file_path = '5k.jsonl'

# Run single operations
print("Running single operations...")
insert_single_ops(file_path)

In [None]:
# Clear collections for fair comparison
db.customers.drop()
db.subscriptions.drop()
db.create_collection('customers')
db.create_collection('subscriptions')

## Bulk Ops

In [None]:
from pymongo import InsertOne, UpdateOne

def insert_bulk_ops(file_path):
    customer_ops = []
    subscription_ops = []

    start_time = time.time()

    # First pass, I read all customers (it could be in bulks of data)
    existing_customers = {
        "fake_external_id": {"data": {}},
    }

    for record in load_jsonl(file_path):
        data = record["customer"]
        merchant = data["merchant"]
        external_id = data["merchant_user_id"]
        q_filter = {"merchant": merchant, "external_id": external_id}
        q_update = {"data": data}

        customer_obj_id = existing_customers.get(external_id) or ObjectId()
        Customer._getcolllection.aggregation(UpdateOne(
            q_filter,
            {"$set": q_update, "$setOnInsert": {"_id": customer_obj_id}},
            upsert=True
        ))

        for subscription in record["subscriptions"]:
            subscription_obj_id = ObjectId()
            subscription_doc = {
                "_id": subscription_obj_id,
                "customer": DBRef("customers", customer_obj_id),
                "data": subscription
            }
            subscription_ops.append(InsertOne(subscription_doc))

    if customer_ops:
        db.customers.bulk_write(customer_ops)
    if subscription_ops:
        db.subscriptions.bulk_write(subscription_ops)

    end_time = time.time()
    print("Bulk Operations Write Time:", end_time - start_time)

In [None]:
# Run bulk operations
print("Running bulk operations...")
insert_bulk_ops(file_path)