# Описание схемы базы данных

### Импорт модулей

In [43]:
import pandas as pd
from sqlalchemy import create_engine, text

### Создание пользователя и базы данных

```sql
CREATE USER entries_user WITH PASSWORD 'entries_password';
CREATE DATABASE entries_db OWNER entries_user;
GRANT ALL PRIVILEGES ON DATABASE entries_db TO entries_user;
```

### Подключение к БД

In [44]:
user = "entries_user"
password = "entries_password"
host = "localhost"
port = "5432"
database = "entries_db"

engine = create_engine(f"postgresql://{user}:{password}@{host}:{port}/{database}")

## Создание таблиц

Исходная таблица с сырыми данными.

In [45]:
query_entries_src = "CREATE TABLE entries_src(" \
    "id INTEGER PRIMARY KEY," \
    "full_name VARCHAR(255) NOT NULL," \
    "event_dt TIMESTAMP NOT NULL," \
    "status VARCHAR(32) NOT NULL," \
    "CONSTRAINT unique_full_name_event_dt UNIQUE (full_name, event_dt)," \
    "CONSTRAINT valid_status CHECK (status IN ('Вход', 'Выход', 'Доступ запрещён'))" \
");"

with engine.connect() as conn:
    with conn.begin():
        conn.execute(text(query_entries_src))

Целевая таблица с интервалами пребывания сотрудников на рабочем месте.

In [46]:
query_intervals_tgt = "CREATE TABLE intervals_tgt(" \
    "id INTEGER PRIMARY KEY," \
    "full_name VARCHAR(255) NOT NULL," \
    "enter_dt TIMESTAMP NOT NULL," \
    "exit_dt TIMESTAMP," \
    "CONSTRAINT unique_full_name_enter_dt UNIQUE (full_name, enter_dt)" \
");"

with engine.connect() as conn:
    with conn.begin():
        conn.execute(text(query_intervals_tgt))

Целевая таблица с рабочими днями сотрудников.

In [47]:
query_workdays_tgt = "CREATE TABLE workdays_tgt(" \
    "id INTEGER PRIMARY KEY," \
    "full_name VARCHAR(255) NOT NULL," \
    "report_dt TIMESTAMP NOT NULL," \
    "enter_dt TIMESTAMP NOT NULL," \
    "exit_dt TIMESTAMP," \
    "CONSTRAINT unique_full_name_report_dt UNIQUE (full_name, report_dt)" \
");"

with engine.connect() as conn:
    with conn.begin():
        conn.execute(text(query_workdays_tgt))

### Агрегированная информация

In [48]:
query_aggregated_info_tgt = """
    CREATE TABLE aggregated_info_tgt (
        id SERIAL PRIMARY KEY,
        full_name VARCHAR(255) NOT NULL,
        month VARCHAR(7) NOT NULL,
        
        workdays_count INTEGER NOT NULL,
        on_time_count INTEGER NOT NULL,
        
        late_0_15 INTEGER NOT NULL,
        late_15_30 INTEGER NOT NULL,
        late_30_60 INTEGER NOT NULL,
        late_60_plus INTEGER NOT NULL,
        
        full_day_count INTEGER NOT NULL,
        short_day_count INTEGER NOT NULL,
        
        avg_worktime NUMERIC(5, 2) NOT NULL
);

"""

with engine.connect() as conn:
    with conn.begin():
        conn.execute(text(query_aggregated_info_tgt))

## Загрузка датасета

In [49]:
df = pd.read_csv("data/source/entries.csv")
df.index.name = "id"

query_insert_entries = "INSERT INTO entries_src (id, full_name, event_dt, status)" \
    "VALUES (:id, :full_name, :event_dt, :status);"

records = df.reset_index().to_dict(orient="records")

with engine.connect() as conn:
    with conn.begin():
        for record in records:
            conn.execute(text(query_insert_entries), record)