# Описание схемы базы данных

## Импорт модулей

In [127]:
import pandas as pd
from sqlalchemy import create_engine, text

## Создание пользователя и базы данных

```sql
CREATE USER entries_user WITH PASSWORD 'entries_password';
CREATE DATABASE entries_db OWNER entries_user;
GRANT ALL PRIVILEGES ON DATABASE entries_db TO entries_user;
```

## Подключение к БД

In [128]:
user = "entries_user"
password = "entries_password"
host = "localhost"
port = "5432"
database = "entries_db"

engine = create_engine(f"postgresql://{user}:{password}@{host}:{port}/{database}")
tables = ['entries_src', 'intervals_tgt', 'workdays_tgt', 'aggregated_info_tgt', 'emergency_ref']

## Удаление старых таблиц

In [129]:
with engine.connect() as conn:
    with conn.begin():
        for table in tables:
            conn.execute(text(f"DROP TABLE IF EXISTS {table};"))

## Создание таблиц

### Исходная таблица с сырыми данными

In [130]:
query_entries_src = """CREATE TABLE entries_src(
    id INTEGER PRIMARY KEY,
    full_name VARCHAR(255) NOT NULL,
    event_dt TIMESTAMP NOT NULL,
    status VARCHAR(32) NOT NULL,
    CONSTRAINT unique_full_name_event_dt UNIQUE (full_name, event_dt),
    CONSTRAINT valid_status CHECK (status IN ('Вход', 'Выход', 'Доступ запрещён'))
);"""

### Справочная таблица с запланированными учениями

In [131]:
query_emergency_ref = """CREATE TABLE emergency_ref(
    id INTEGER PRIMARY KEY,
    name VARCHAR(255) NOT NULL,
    event_dt TIMESTAMP NOT NULL,
    duration INTEGER CHECK (duration <= 60)
);"""

### Целевая таблица с интервалами пребывания сотрудников на рабочем месте

In [132]:
query_intervals_tgt = """CREATE TABLE intervals_tgt(
    id INTEGER PRIMARY KEY,
    full_name VARCHAR(255) NOT NULL,
    enter_dt TIMESTAMP NOT NULL,
    exit_dt TIMESTAMP,
    CONSTRAINT unique_full_name_enter_dt UNIQUE (full_name, enter_dt)
);"""

### Целевая таблица с рабочими днями сотрудников

In [None]:
query_workdays_tgt = """CREATE TABLE workdays_tgt(
    id INTEGER PRIMARY KEY,
    full_name VARCHAR(255) NOT NULL,
    report_date TIMESTAMP NOT NULL,
    enter_dt TIMESTAMP NOT NULL,
    exit_dt TIMESTAMP NOT NULL,
    CONSTRAINT unique_full_name_report_date UNIQUE (full_name, report_date)
);"""

### Агрегированная информация

In [134]:
query_aggregated_info_tgt = """
    CREATE TABLE aggregated_info_tgt (
        id SERIAL PRIMARY KEY,
        full_name VARCHAR(255) NOT NULL,
        month VARCHAR(7) NOT NULL,
        
        workdays_count INTEGER NOT NULL,
        on_time_count INTEGER NOT NULL,
        
        late_0_15 INTEGER NOT NULL,
        late_15_30 INTEGER NOT NULL,
        late_30_60 INTEGER NOT NULL,
        late_60_plus INTEGER NOT NULL,
        
        full_day_count INTEGER NOT NULL,
        short_day_count INTEGER NOT NULL,
        
        avg_worktime NUMERIC(5, 2) NOT NULL
);"""

## Загрузка датасета

In [135]:
df_data = pd.read_csv("data/source/entries_src.csv")
df_data.index.name = "id"

query_insert_entries_src = """INSERT INTO entries_src (id, full_name, event_dt, status)
    VALUES (:id, :full_name, :event_dt, :status);"""

records_data = df_data.reset_index().to_dict(orient="records")


df_emergency = pd.read_csv("data/reference/emergency_ref.csv")
df_emergency.index.name = "id"

query_insert_emergency_ref = """INSERT INTO emergency_ref (id, name, event_dt, duration)
    VALUES (:id, :name, :event_dt, :duration);"""

records_emergency = df_emergency.reset_index().to_dict(orient="records")

with engine.connect() as conn:
    with conn.begin():
        conn.execute(text(query_entries_src))

        conn.execute(text(query_emergency_ref))

        conn.execute(text(query_intervals_tgt))
        conn.execute(text(query_workdays_tgt))
        conn.execute(text(query_aggregated_info_tgt))

        for record in records_data:
            conn.execute(text(query_insert_entries_src), record)

        for record in records_emergency:
            conn.execute(text(query_insert_emergency_ref), record)