LOAD

In [None]:
import collections.abc as collections_abc
import dataclasses
import itertools
import typing

import faker
import more_itertools
import psycopg2

import settings
import utils.profilers as profiler_utils
import utils.psycopg2 as psycopg2_utils

In [None]:
connection = psycopg2.connect(**settings.POSTGRESQL_DATABASE_SETTINGS)

def create_tables(connection: psycopg2_utils.Connection) -> None:
    with connection.cursor() as cursor:
        cursor.execute(
            """
            CREATE TABLE IF NOT EXISTS users(
                id serial primary key,
                name text NOT NULL,
                description text NOT NULL
            )
            """
        )
    connection.commit()

create_tables(connection)

In [None]:
def truncate_tables(connection: psycopg2_utils.Connection) -> None:
    with connection.cursor() as cursor:
        cursor.execute("TRUNCATE users")
    connection.commit()

In [None]:
@dataclasses.dataclass
class User:
    name: str
    description: str

In [None]:
SIZE = 10_000

def gen_fake_users() -> collections_abc.Iterator[User]:
    fake = faker.Faker()
    return (User(name=fake.name(), description=fake.text()) for _ in range(SIZE))

In [None]:
ExecuteType = collections_abc.Callable[[psycopg2_utils.Connection, typing.Iterator[User]], None]

def run_execution(func: ExecuteType, connection: psycopg2_utils.Connection) -> None:
    users = gen_fake_users()
    func(connection, users)
    truncate_tables(connection)

In [None]:
@profiler_utils.profile
def execute_single(connection: psycopg2_utils.Connection, users: collections_abc.Iterator[User]) -> None:
    with connection.cursor() as cursor:
        for user in users:
            stmt = "INSERT INTO users (name, description) VALUES (%s, %s)"
            data = (user.name, user.description)
            cursor.execute(stmt, data)
    connection.commit()

In [None]:
@profiler_utils.profile
def executemany(connection: psycopg2_utils.Connection, users: collections_abc.Iterator[User]) -> None:
    with connection.cursor() as cursor:
        stmt = "INSERT INTO users (name, description) VALUES (%s, %s)"
        data = ((user.name, user.description) for user in users)
        cursor.executemany(stmt, data)
    connection.commit()

In [None]:
@profiler_utils.profile
def execute_single_query(connection: psycopg2_utils.Connection, users: collections_abc.Iterator[User]) -> None:
    data = list(itertools.chain.from_iterable((user.name, user.description) for user in users))
    stmt = f"INSERT INTO users (name, description) VALUES {','.join('(%s, %s)' for _ in range(len(data)//2))}"
    with connection.cursor() as cursor:
        cursor.execute(stmt, data)
    connection.commit()

In [None]:
run_execution(execute_single, connection)

In [None]:
run_execution(executemany, connection)

In [None]:
run_execution(execute_single_query, connection)

In [None]:
CHUNK_SIZE = 500

In [None]:
@profiler_utils.profile
def execute_chunks(connection: psycopg2_utils.Connection, users: collections_abc.Iterator[User]) -> None:
    stmt = "INSERT INTO users (name, description) VALUES (%s, %s)"
    with connection.cursor() as cursor:
        for user_chunk in more_itertools.ichunked(users, CHUNK_SIZE):
            for user in user_chunk:
                data = (user.name, user.description)
                cursor.execute(stmt, data)
            connection.commit()

In [None]:
@profiler_utils.profile
def executemany_chunks(connection: psycopg2_utils.Connection, users: collections_abc.Iterator[User]) -> None:
    stmt = "INSERT INTO users (name, description) VALUES (%s, %s)"
    with connection.cursor() as cursor:
        for user_chunk in more_itertools.ichunked(users, CHUNK_SIZE):
            data = [(user.name, user.description) for user in user_chunk]
            cursor.executemany(stmt, data)
            connection.commit()

In [None]:
@profiler_utils.profile
def execute_single_query_chunks(connection: psycopg2_utils.Connection, users: collections_abc.Iterator[User]) -> None:
    with connection.cursor() as cursor:
        for user_chunk in more_itertools.ichunked(users, CHUNK_SIZE):
            data = list(itertools.chain.from_iterable((user.name, user.description) for user in user_chunk))
            stmt = f"INSERT INTO users (name, description) VALUES {','.join('(%s, %s)' for _ in range(len(data)//2))}"
            cursor.execute(stmt, data)
            connection.commit()

In [None]:
run_execution(execute_single, connection)

In [None]:
run_execution(execute_chunks, connection)

In [None]:
run_execution(executemany_chunks, connection)

In [None]:
run_execution(execute_single_query_chunks, connection)

In [None]:
def drop_tables(connection: psycopg2_utils.Connection) -> None:
    with connection.cursor() as cursor:
        cursor.execute("TRUNCATE users")
    connection.commit()

drop_tables(connection)
