# Исследование Postgres

In [114]:
import time
import statistics

class TimerCode:
    def __init__(self):
        self.start = time.time()
        self.durations = []

    def setup_start_time(self):
        self.start = time.time()

    def checkpoint(self):
        end_time = time.time()
        self.durations.append(end_time - self.start)
        self.start = end_time

    def get_current_stat(self):
        print(f"Median - {statistics.median(self.durations)}")
        avg = statistics.mean(self.durations)
        print(f"Average - {avg}")
        print(f"Summary - {sum(self.durations)}")

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.get_current_stat()

In [115]:
import psycopg2
import random

db_params = {
    'host': 'localhost',
    'port': 5432,
    'dbname': 'db',
    'user': 'user',
    'password': 'password'
}

conn = psycopg2.connect(**db_params)
cur = conn.cursor()

cur.execute("DROP TABLE IF EXISTS users CASCADE;")
cur.execute("DROP TABLE IF EXISTS movies CASCADE;")
cur.execute("DROP TABLE IF EXISTS bookmarks CASCADE;")
cur.execute("""
    CREATE TABLE IF NOT EXISTS users (
        id int PRIMARY KEY,
        name VARCHAR(255) NOT NULL
    );
""")

cur.execute("""
    CREATE TABLE IF NOT EXISTS movies (
        id int PRIMARY KEY,
        title VARCHAR(255) NOT NULL
    );
""")

cur.execute("""
    CREATE TABLE IF NOT EXISTS bookmarks (
        user_id int REFERENCES users(id),
        movie_id int REFERENCES movies(id),
        PRIMARY KEY (user_id, movie_id)
    );
""")

cur.execute("TRUNCATE TABLE users CASCADE;")
cur.execute("TRUNCATE TABLE movies CASCADE;")
cur.execute("TRUNCATE TABLE bookmarks CASCADE;")



In [116]:
# Generate some fake user and movie ids
USERS_NUM = 1_000
MOVIES_NUM = 3_000

BOOKMARKS_PER_USER_LIST = [1, 2, 5, 10, 100, 200, 500, 1000]

user_ids = [i for i in range(USERS_NUM)]
movie_ids = [i for i in range(MOVIES_NUM)]


In [117]:

with TimerCode() as timer:
    print('Время загрузки пользователей')
    timer.setup_start_time()
    for user_id in user_ids:
        cur.execute("INSERT INTO users (id, name) VALUES (%s, %s)", (user_id, user_id))
    timer.checkpoint()


Время загрузки пользователей
Median - 0.9113919734954834
Average - 0.9113919734954834
Summary - 0.9113919734954834


In [118]:
with TimerCode() as timer:
    print('Время загрузки фильмов')
    timer.setup_start_time()
    for movie_id in movie_ids:
        cur.execute("INSERT INTO movies (id, title) VALUES (%s, %s)", (movie_id, movie_id))
    timer.checkpoint()



Время загрузки фильмов
Median - 2.3618149757385254
Average - 2.3618149757385254
Summary - 2.3618149757385254


In [119]:
from psycopg2.extras import execute_values

for bookmarks_per_user in BOOKMARKS_PER_USER_LIST:
    print(f"Total bookmarks: {bookmarks_per_user * USERS_NUM}")
    with TimerCode() as timer:
        print('Время загрузки закладок')
        timer.setup_start_time()
        bookmark_tuples = [
        (user_id, movie_id) for user_id in user_ids
        for movie_id in random.sample(movie_ids, bookmarks_per_user)
    ]
        execute_values(cur, "INSERT INTO bookmarks (user_id, movie_id) VALUES %s ON CONFLICT (user_id, movie_id) DO NOTHING", bookmark_tuples)
        timer.checkpoint()

    with TimerCode() as timer:
        print('Время чтения закладок')
        timer.setup_start_time()
        cur.execute("SELECT * FROM bookmarks;")
        timer.checkpoint()

    for item in ('bookmarks', 'users', 'movies'):
        cur.execute(f"SELECT COUNT(*) FROM {item};")
        row = cur.fetchone()
        count = row[0]
        print(f'Всего {item}: {count}')
    print('=' * 50, '\n')

conn.commit()
cur.close()
conn.close()




Median - 7.697979927062988
Average - 7.697979927062988
Summary - 7.697979927062988
Время чтения закладок
Median - 0.13214898109436035
Average - 0.13214898109436035
Summary - 0.13214898109436035
Всего bookmarks: 309626
Всего users: 1000
Всего movies: 3000

Total bookmarks: 500000
Время загрузки закладок
Median - 18.206341981887817
Average - 18.206341981887817
Summary - 18.206341981887817
Время чтения закладок
Median - 0.2248551845550537
Average - 0.2248551845550537
Summary - 0.2248551845550537
Всего bookmarks: 758082
Всего users: 1000
Всего movies: 3000

Total bookmarks: 1000000
Время загрузки закладок
Median - 31.04560112953186
Average - 31.04560112953186
Summary - 31.04560112953186
Время чтения закладок
Median - 0.6207370758056641
Average - 0.6207370758056641
Summary - 0.6207370758056641
Всего bookmarks: 1505231
Всего users: 1000
Всего movies: 3000

Total bookmarks: 1000
Время загрузки закладок
Median - 0.07389402389526367
Average - 0.07389402389526367
Summary - 0.07389402389526367
Вр