In [3]:
import os
import pandas as pd
from sqlalchemy import create_engine, text

# ---------- НАСТРОЙКИ ----------
DB_URI_ARCHIVE = "postgresql://postgres:smartgrid@172.31.168.2/archive_test"
OUTPUT_DIR = "data"
TOPIC = "P0086H01/I002/Ptot"   # <-- сюда впиши нужный topic
YEAR = 2025
# -------------------------------

os.makedirs(OUTPUT_DIR, exist_ok=True)

engine_archive = create_engine(DB_URI_ARCHIVE)

# 1. Берём tag_id по topic
sql_tag = text("""
    SELECT id
    FROM tags
    WHERE topic = :topic
    LIMIT 1
""")

with engine_archive.connect() as conn:
    row = conn.execute(sql_tag, {"topic": TOPIC}).fetchone()

if row is None:
    print(f"❌ Топик '{TOPIC}' не найден в таблице tags")
else:
    tag_id = row[0]
    print(f"Найден tag_id={tag_id} для topic='{TOPIC}'")

    # 2. Вытаскиваем все значения из archive за 2025 год
    start = f"{YEAR}-01-01"
    end   = f"{YEAR+1}-01-01"

    sql_arc = text("""
        SELECT 
            a.tmstamp,
            a.value,
            a.tag_id,
            t.topic
        FROM archive a
        JOIN tags t ON t.id = a.tag_id
        WHERE a.tag_id = :tag_id
          AND a.tmstamp >= :start
          AND a.tmstamp < :end
        ORDER BY a.tmstamp
    """)

    with engine_archive.connect() as conn:
        df_prod = pd.read_sql(
            sql_arc,
            conn,
            params={"tag_id": tag_id, "start": start, "end": end}
        )

    print("Всего строк:", len(df_prod))
    if len(df_prod) == 0:
        display(df_prod)
    else:
        # приведение времени и немного удобства
        df_prod["tmstamp"] = pd.to_datetime(df_prod["tmstamp"])
        df_prod = df_prod.sort_values("tmstamp").reset_index(drop=True)
        df_prod["date"] = df_prod["tmstamp"].dt.date

        display(df_prod.head())

        # 3. сохраняем в один файл
        safe_topic = TOPIC.replace("/", "_")
        out_csv = os.path.join(OUTPUT_DIR, f"production_{safe_topic}_{YEAR}.csv")
        out_pq  = os.path.join(OUTPUT_DIR, f"production_{safe_topic}_{YEAR}.parquet")

        df_prod.to_csv(out_csv, index=False, encoding="utf-8")
        df_prod.to_parquet(out_pq, index=False)

        print("✅ Сохранено:")
        print("CSV:", out_csv)
        print("Parquet:", out_pq)



Найден tag_id=12223 для topic='P0086H01/I002/Ptot'
Всего строк: 26946


Unnamed: 0,tmstamp,value,tag_id,topic,date
0,2025-01-01 05:30:00,0.0,12223,P0086H01/I002/Ptot,2025-01-01
1,2025-01-01 05:35:00,0.0,12223,P0086H01/I002/Ptot,2025-01-01
2,2025-01-01 05:40:00,34.813953,12223,P0086H01/I002/Ptot,2025-01-01
3,2025-01-01 05:45:00,150.93024,12223,P0086H01/I002/Ptot,2025-01-01
4,2025-01-01 05:50:00,318.74417,12223,P0086H01/I002/Ptot,2025-01-01


ImportError: Unable to find a usable engine; tried using: 'pyarrow', 'fastparquet'.
A suitable version of pyarrow or fastparquet is required for parquet support.
Trying to import the above resulted in these errors:
 - Missing optional dependency 'pyarrow'. pyarrow is required for parquet support. Use pip or conda to install pyarrow.
 - Missing optional dependency 'fastparquet'. fastparquet is required for parquet support. Use pip or conda to install fastparquet.