# Описание схемы базы данных

## Импорт модулей

In [1]:
import pandas as pd
from sqlalchemy import create_engine, text

## Создание пользователя и базы данных

```sql
CREATE USER entries_user WITH PASSWORD 'entries_password';
CREATE DATABASE entries_db OWNER entries_user;
GRANT ALL PRIVILEGES ON DATABASE entries_db TO entries_user;
```

## Подключение к БД

In [2]:
user = "entries_user"
password = "entries_password"
host = "localhost"
port = "5432"
database = "entries_db"

engine = create_engine(f"postgresql://{user}:{password}@{host}:{port}/{database}")
tables = ["entries_src", "intervals_tgt", "workdays_tgt", "aggregated_info_tgt", "emergency_ref", "departments_ref", "worker_department_xref"]

## Удаление старых таблиц

In [3]:
with engine.connect() as conn:
    with conn.begin():
        for table in tables:
            conn.execute(text(f"DROP TABLE IF EXISTS {table};"))

## Создание таблиц

### Исходная таблица с сырыми данными

In [4]:
query_entries_src = """CREATE TABLE entries_src(
    id INTEGER PRIMARY KEY,
    full_name VARCHAR(255) NOT NULL,
    event_dt TIMESTAMP NOT NULL,
    status VARCHAR(32) NOT NULL,
    CONSTRAINT unique_full_name_event_dt UNIQUE (full_name, event_dt),
    CONSTRAINT valid_status CHECK (status IN ('Вход', 'Выход', 'Доступ запрещён'))
);"""

### Справочная таблица с запланированными учениями

In [5]:
query_emergency_ref = """CREATE TABLE emergency_ref(
    id INTEGER PRIMARY KEY,
    name VARCHAR(255) NOT NULL,
    event_dt TIMESTAMP NOT NULL,
    duration INTEGER CHECK (duration <= 60)
);"""

### Справочная таблица департаментов

In [6]:
query_dep_ref = """CREATE TABLE departments_ref(
    id INTEGER PRIMARY KEY,
    name VARCHAR(255) NOT NULL,
    enter_hour INTEGER NOT NULL CHECK (enter_hour BETWEEN 0 AND 23),
    exit_hour INTEGER NOT NULL CHECK (exit_hour BETWEEN 0 AND 23)
);"""

### Таблица связности сотрудник - департамент

In [7]:
query_worker_dep_xref = """CREATE TABLE worker_department_xref(
    id INTEGER PRIMARY KEY,
    full_name VARCHAR(255) NOT NULL,
    department VARCHAR(255) NOT NULL
);"""

### Целевая таблица с интервалами пребывания сотрудников на рабочем месте

In [8]:
query_intervals_tgt = """CREATE TABLE intervals_tgt(
    id INTEGER PRIMARY KEY,
    full_name VARCHAR(255) NOT NULL,
    enter_dt TIMESTAMP NOT NULL,
    exit_dt TIMESTAMP,
    CONSTRAINT unique_full_name_enter_dt UNIQUE (full_name, enter_dt)
);"""

### Целевая таблица с рабочими днями сотрудников

In [9]:
query_workdays_tgt = """CREATE TABLE workdays_tgt(
    id INTEGER PRIMARY KEY,
    full_name VARCHAR(255) NOT NULL,
    report_date TIMESTAMP NOT NULL,
    enter_dt TIMESTAMP NOT NULL,
    exit_dt TIMESTAMP NOT NULL,
    CONSTRAINT unique_full_name_report_date UNIQUE (full_name, report_date)
);"""

### Агрегированная информация

In [10]:
query_aggregated_info_tgt = """CREATE TABLE aggregated_info_tgt(
    full_name TEXT NOT NULL,
    month TEXT NOT NULL, -- формат YYYY-MM
    workdays_count INTEGER NOT NULL,
    on_time_count INTEGER NOT NULL,
    late_0_15 INTEGER NOT NULL,
    late_15_30 INTEGER NOT NULL,
    late_30_60 INTEGER NOT NULL,
    late_60_plus INTEGER NOT NULL,
    left_on_time_count INTEGER NOT NULL,
    left_early_0_15 INTEGER NOT NULL,
    left_early_15_30 INTEGER NOT NULL,
    left_early_30_60 INTEGER NOT NULL,
    left_early_60_plus INTEGER NOT NULL,
    full_day_count INTEGER NOT NULL,
    short_day_count INTEGER NOT NULL,
    avg_worktime NUMERIC(5,2) NOT NULL
);"""

## Загрузка датасета

In [11]:
df_data = pd.read_csv("data/source/entries_src.csv")
df_data.index.name = "id"

query_insert_entries_src = """INSERT INTO entries_src (id, full_name, event_dt, status)
    VALUES (:id, :full_name, :event_dt, :status);"""

records_data = df_data.reset_index().to_dict(orient="records")


df_emergency = pd.read_csv("data/reference/emergency_ref.csv")
df_emergency.index.name = "id"

query_insert_emergency_ref = """INSERT INTO emergency_ref (id, name, event_dt, duration)
    VALUES (:id, :name, :event_dt, :duration);"""

records_emergency = df_emergency.reset_index().to_dict(orient="records")


df_dep = pd.read_csv("data/reference/departments_ref.csv")
df_dep.index.name = "id"

query_insert_dep_ref = """INSERT INTO departments_ref (id, name, enter_hour, exit_hour)
    VALUES (:id, :name, :enter_hour, :exit_hour);"""

records_dep = df_dep.reset_index().to_dict(orient="records")


df_worker_dep = pd.read_csv("data/reference/worker_department_xref.csv")
df_worker_dep.index.name = "id"

query_insert_worker_dep_ref = """INSERT INTO worker_department_xref (id, full_name, department)
    VALUES (:id, :full_name, :department);"""

records_worker_dep = df_worker_dep.reset_index().to_dict(orient="records")


with engine.connect() as conn:
    with conn.begin():
        conn.execute(text(query_entries_src))

        conn.execute(text(query_emergency_ref))
        conn.execute(text(query_dep_ref))
        conn.execute(text(query_worker_dep_xref))

        conn.execute(text(query_intervals_tgt))
        conn.execute(text(query_workdays_tgt))
        conn.execute(text(query_aggregated_info_tgt))

        for record in records_data:
            conn.execute(text(query_insert_entries_src), record)

        for record in records_emergency:
            conn.execute(text(query_insert_emergency_ref), record)

        for record in records_dep:
            conn.execute(text(query_insert_dep_ref), record)
        
        for record in records_worker_dep:
            conn.execute(text(query_insert_worker_dep_ref), record)