In [39]:
# Подготовка скриптов для загрузки данных

In [40]:
import time
import statistics
import uuid
import random
from bson import Binary

USERS_NUM = 500_000
MOVIES_NUM = 30_000

BOOKMARKS_PER_USER = 40

MAX_RATING = 10
MIN_RATING = 0

def get_uuid():
    return Binary.from_uuid(uuid.uuid4())

user_ids = [get_uuid() for _ in range(USERS_NUM)]
movie_ids = [get_uuid() for _ in range(MOVIES_NUM)]

user_bookmarks = {
            '_id': random.choice(user_ids),
            'bookmarks': [
                movie_id for movie_id
                in random.sample(movie_ids, BOOKMARKS_PER_USER)
            ]
    }

users_bookmarks = [
    user_bookmarks for _ in range(10)
]
print(users_bookmarks, len(user_ids))


In [41]:
class TimerCode:
    def __init__(self):
        self.start = time.time()
        self.durations = []

    def setup_start_time(self):
        self.start = time.time()

    def checkpoint(self):
        end_time = time.time()
        self.durations.append(end_time - self.start)
        self.start = end_time

    def get_current_stat(self):
        print(f"Median - {statistics.median(self.durations)}")
        avg = statistics.mean(self.durations)
        print(f"Average - {avg}")
        print(f"Summary - {sum(self.durations)}")

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.get_current_stat()


# Исследование Mongo

In [42]:
import pymongo

client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client["mydatabase"]
collection = db["mycollection"]


with TimerCode() as timer:
    timer.setup_start_time()
    collection.insert_many(user_bookmarks)
    timer.checkpoint()

with TimerCode() as timer:
    timer.setup_start_time()
    documents = collection.find()
    timer.checkpoint()
# Find all documents in the collection

i = 0
for doc in documents:
    print(doc)
    i += 1
print(i)


Test on 10 user's bookmarks


Test writing time
Median - 0.017750978469848633
Average - 0.017750978469848633
Summary - 0.017750978469848633

Test reading time
Median - 0.000102996826171875
Average - 0.000102996826171875
Summary - 0.000102996826171875

Test on 100 user's bookmarks


Test writing time
Median - 0.016696691513061523
Average - 0.016696691513061523
Summary - 0.016696691513061523

Test reading time
Median - 3.314018249511719e-05
Average - 3.314018249511719e-05
Summary - 3.314018249511719e-05

Test on 200 user's bookmarks


Test writing time
Median - 0.019715070724487305
Average - 0.019715070724487305
Summary - 0.019715070724487305

Test reading time
Median - 3.170967102050781e-05
Average - 3.170967102050781e-05
Summary - 3.170967102050781e-05

Test on 500 user's bookmarks


Test writing time
Median - 0.023834943771362305
Average - 0.023834943771362305
Summary - 0.023834943771362305

Test reading time
Median - 3.1948089599609375e-05
Average - 3.1948089599609375e-05
Summary - 3