# Исследование Postgres

In [23]:
import time
import statistics

class TimerCode:
    def __init__(self):
        self.start = time.time()
        self.durations = []

    def setup_start_time(self):
        self.start = time.time()

    def checkpoint(self):
        end_time = time.time()
        self.durations.append(end_time - self.start)
        self.start = end_time

    def get_current_stat(self):
        print(f"Median - {statistics.median(self.durations)}")
        avg = statistics.mean(self.durations)
        print(f"Average - {avg}")
        print(f"Summary - {sum(self.durations)}")

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.get_current_stat()

In [24]:
import psycopg2
import uuid
import random

db_params = {
    'host': 'localhost',
    'port': 5432,
    'dbname': 'db',
    'user': 'user',
    'password': 'password'
}

conn = psycopg2.connect(**db_params)
cur = conn.cursor()

cur.execute("DROP TABLE IF EXISTS users CASCADE;")
cur.execute("DROP TABLE IF EXISTS movies CASCADE;")
cur.execute("DROP TABLE IF EXISTS bookmarks CASCADE;")
cur.execute("""
    CREATE TABLE IF NOT EXISTS users (
        id int PRIMARY KEY,
        name VARCHAR(255) NOT NULL
    );
""")

cur.execute("""
    CREATE TABLE IF NOT EXISTS movies (
        id int PRIMARY KEY,
        title VARCHAR(255) NOT NULL
    );
""")

cur.execute("""
    CREATE TABLE IF NOT EXISTS bookmarks (
        user_id int REFERENCES users(id),
        movie_id int REFERENCES movies(id),
        PRIMARY KEY (user_id, movie_id)
    );
""")

cur.execute("TRUNCATE TABLE users CASCADE;")
cur.execute("TRUNCATE TABLE movies CASCADE;")
cur.execute("TRUNCATE TABLE bookmarks CASCADE;")


# Generate some fake user and movie ids
USERS_NUM = 5_000
MOVIES_NUM = 3_000

BOOKMARKS_PER_USER = 40

user_ids = [i for i in range(USERS_NUM)]
movie_ids = [i for i in range(MOVIES_NUM)]


In [25]:

with TimerCode() as timer:
    print('Время загрузки пользователей')
    timer.setup_start_time()
    for user_id in user_ids:
        cur.execute("INSERT INTO users (id, name) VALUES (%s, %s)", (user_id, user_id))
    timer.checkpoint()


Время загрузки пользователей
Median - 4.47627592086792
Average - 4.47627592086792
Summary - 4.47627592086792


In [26]:
with TimerCode() as timer:
    print('Время загрузки фильмов')
    timer.setup_start_time()
    for movie_id in movie_ids:
        cur.execute("INSERT INTO movies (id, title) VALUES (%s, %s)", (movie_id, movie_id))
    timer.checkpoint()



Время загрузки фильмов
Median - 2.495952844619751
Average - 2.495952844619751
Summary - 2.495952844619751


In [27]:
with TimerCode() as timer:
    print('Время загрузки закладок')
    for user_id in user_ids:
        bookmarks = [
            movie_id for movie_id in random.sample(movie_ids, BOOKMARKS_PER_USER)
        ]
    timer.setup_start_time()
    for movie_id in bookmarks:
        cur.execute("INSERT INTO bookmarks (user_id, movie_id) VALUES (%s, %s)", (user_id, movie_id))
    timer.checkpoint()



Время загрузки закладок
Median - 0.033963918685913086
Average - 0.033963918685913086
Summary - 0.033963918685913086


In [29]:
with TimerCode() as timer:
    print('Время чтения закладок')
    timer.setup_start_time()
    cur.execute("SELECT * FROM bookmarks;")
    timer.checkpoint()


conn.commit()
cur.close()
conn.close()

Время чтения закладок
Median - 0.002110004425048828
Average - 0.002110004425048828
Summary - 0.002110004425048828
