# Mongo vs Postgres 

In [None]:
from datetime import datetime
from uuid import uuid4
from random import choice
import random as rd

test_comment = " ".join([rd.choice(["lorem", "ipsum", "dolor", "sit", "amet", "consectetur", "adipiscing", "elit", "sed", "do", "eiusmod", "tempor", "incididunt", "ut", "labore", "et", "dolore", "magna", "aliqua"]) for _ in range(50)])
test_comment = test_comment.capitalize() + "."
test_comment = test_comment * 6
test_comment = test_comment[:300]

users = [str(uuid4()) for _ in range(100)]
films = [str(uuid4()) for _ in range(1000)]

def generate_review()->dict:
    for i in range(100_000):
        i % 100 == 0 and print(f"{i} / 100_000")
        yield {
            "user_id": choice(users),
            "film_id": choice(films),
            "comment": test_comment,
            "timestamp": datetime.now(),
        }

## Mongo

In [None]:
from pymongo import MongoClient
import pymongo

In [None]:
client = MongoClient('localhost', 27017)
db = client['test_database']
collection = db['test_collection']
collection.drop()

In [None]:
collection.create_index("user_id")
collection.create_index("film_id")

In [None]:
import time

start = time.time()
counter = 0
for row in generate_review():
    collection.insert_one(row) 
    counter += 1
end = time.time()

print(f"Batch {counter} inserted")
print(f"Insertion took {end - start:.4f} seconds")
print(f"average insertion time: {(end - start) / counter:.4f} seconds")

In [None]:
import timeit

def read_test():
    cursor  = collection.find_one({"user_id": choice(users)})
    _ = list(cursor)

print(f"Average select time: {timeit.timeit(read_test, number=100_000) / 100_000} sec")

In [None]:
import timeit

def update_test():
    cursor  = collection.update_one({"user_id": choice(users)}, {"$set": {"comment": "test_comment"}})
    _ = list(cursor)

print(f"Average update time: {timeit.timeit(read_test, number=10_000) / 10_000} sec")

### Insert batch test:
* 100_000 docs inserted
* Insertion took 37.5226 seconds
* average insertion time: 0.0004 seconds

### Read batch test
* Reads Number = 100_000 times 
* Average select time: 0.00039721483082976193 sec

## Update test
* Update Number = 10_000 times 
* Average update time: 0.0003978825124911964 sec

## Postgres

In [None]:
import psycopg2

In [None]:
connection = psycopg2.connect(dsn="dbname=postgres_db user=postgres password=postgres host=localhost port=5432")
cursor = connection.cursor()

In [None]:
cursor.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp";')
connection.commit()


In [None]:
cursor.execute("DROP TABLE IF EXISTS collection")
connection.commit()


In [None]:
cursor.execute(
"""
    CREATE TABLE IF NOT EXISTS collection(
        id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
        user_id UUID,
        film_id UUID,
        comment TEXT,
        timestamp TIMESTAMP
    );
"""
)
connection.commit()

In [None]:
import time

start = time.time()
counter = 0
for row in generate_review():
    cursor.execute("INSERT INTO collection (user_id, film_id, comment, timestamp) VALUES(%(user_id)s,%(film_id)s,%(comment)s,%(timestamp)s)", row)
    counter += 1
end = time.time()

print(f"Batch {counter} inserted")
print(f"Insertion took {end - start:.4f} seconds")
print(f"average insertion time: {(end - start) / counter:.4f} seconds")

In [34]:
import timeit

def read_test():
    cursor.execute("SELECT * FROM collection WHERE user_id = %(user_id)s;", {"user_id": choice(users)})
    cursor.fetchone()

print(f"Average select time: {timeit.timeit(read_test, number=1000) / 1000} sec")


Average select time: 0.011274614583002404 sec


In [35]:
sql_pipeline = """SELECT user_id, COUNT(*) as count FROM collection.test_collection GROUP BY user_id ORDER BY count DESC LIMIT 10 """
def update_test():
    cursor.execute("UPDATE collection SET comment = %(comment)s WHERE user_id = %(user_id)s;", {"user_id": choice(users), "comment": "test_comment"})

print(f"Average update time: {timeit.timeit(update_test, number=1000) / 1000} sec")

Average update time: 0.0204745907089673 sec


### Insert batch test:
* 100_000 inserted rows
* Insertion took 30.5753 seconds
* average insertion time: 0.0003 seconds

### Read batch test
* Reads Number = 1000 times 
* Average select time: 0.011274614583002404 sec

## Update test
* Update Number = 1000 times 
* Average update time: 0.0204745907089673 sec