In [39]:
# Подготовка скриптов для загрузки данных

In [1]:
import time
import statistics
import uuid
import random
from bson import Binary

USERS_NUM = 500_000
MOVIES_NUM = 30_000

BOOKMARKS_PER_USER = 40

MAX_RATING = 10
MIN_RATING = 0

def get_uuid():
    return Binary.from_uuid(uuid.uuid4())

user_ids = [get_uuid() for _ in range(USERS_NUM)]
movie_ids = [get_uuid() for _ in range(MOVIES_NUM)]

user_bookmarks = {
            '_id': random.choice(user_ids),
            'bookmarks': [
                movie_id for movie_id
                in random.sample(movie_ids, BOOKMARKS_PER_USER)
            ]
    }

users_bookmarks = [
    user_bookmarks for _ in range(10)
]
print(users_bookmarks, len(user_ids))


In [2]:
class TimerCode:
    def __init__(self):
        self.start = time.time()
        self.durations = []

    def setup_start_time(self):
        self.start = time.time()

    def checkpoint(self):
        end_time = time.time()
        self.durations.append(end_time - self.start)
        self.start = end_time

    def get_current_stat(self):
        print(f"Median - {statistics.median(self.durations)}")
        avg = statistics.mean(self.durations)
        print(f"Average - {avg}")
        print(f"Summary - {sum(self.durations)}")

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.get_current_stat()


# Исследование Mongo

In [4]:
import pymongo

client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client["mydatabase"]
collection = db["mycollection"]


with TimerCode() as timer:
    timer.setup_start_time()
    collection.insert_many(user_bookmarks)
    timer.checkpoint()

with TimerCode() as timer:
    timer.setup_start_time()
    documents = collection.find()
    timer.checkpoint()


Test on 10 user's bookmarks


Test writing time
Average - 0.0184786319732666

Test reading time
Average - 2.471605936686198e-05

Test on 100 user's bookmarks


Test writing time
Average - 0.023689746856689453

Test reading time
Average - 1.9709269205729168e-05

Test on 200 user's bookmarks


Test writing time
Average - 0.024291038513183594

Test reading time
Average - 2.392133076985677e-05

Test on 500 user's bookmarks


Test writing time
Average - 0.033284266789754234

Test reading time
Average - 2.185503641764323e-05

Test on 1000 user's bookmarks


Test writing time
Average - 0.048758347829182945

Test reading time
Average - 2.0742416381835938e-05

Test on 2000 user's bookmarks


Test writing time
Average - 0.07934776941935222

Test reading time
Average - 2.7497609456380207e-05

Test on 5000 user's bookmarks


Test writing time
Average - 0.18643657366434732

Test reading time
Average - 2.6067097981770832e-05

Test on 10000 user's bookmarks


Test writing time
Average - 0.29637026786