In [1]:
import os
import pandas as pd

DATA_DIR = "data"

# параметры нашего объекта/топика
USER_OBJECT_ID = 70
TOPIC = "P0086H01/I002/Ptot"

# ---- 1. Читаем погоду ----
weather_file = os.path.join(DATA_DIR, f"weather_uid_{USER_OBJECT_ID}_full.csv")
df_w = pd.read_csv(weather_file, parse_dates=["time"])
print("Погода:", df_w.shape)

# ---- 2. Читаем выработку ----
prod_file = os.path.join(DATA_DIR, "production_P0086H01_I002_Ptot_2025.csv")
df_p = pd.read_csv(prod_file, parse_dates=["tmstamp"])
print("Выработка (сырая):", df_p.shape)

# приводим к 15-минутному шагу
df_p_15 = (
    df_p[["tmstamp", "value"]]  # только нужные колонки
    .set_index("tmstamp")
    .resample("15min")
    .mean()
    .reset_index()
)
df_p_15.rename(columns={"tmstamp": "time", "value": "power_kw"}, inplace=True)
print("Выработка (15 мин):", df_p_15.shape)

# ---- 3. Мерджим погоду и выработку по времени ----
df_w_sorted = df_w.sort_values("time")
df_p_sorted = df_p_15.sort_values("time")

df = pd.merge_asof(
    df_p_sorted,
    df_w_sorted,
    on="time",
    direction="nearest",
    tolerance=pd.Timedelta("7min"),
)

# убираем строки, где нет погоды
df = df.dropna(subset=["temp_c", "cloud"]).reset_index(drop=True)
print("После merge:", df.shape)

# ---- 4. Удаляем тёмное время (нет выработки) ----
df_day = df[df["power_kw"] > 0].copy()
print("После фильтра дня (power_kw > 0):", df_day.shape)

# лишняя дата/время в удобном виде
df_day["date"] = df_day["time"].dt.date
df_day["hour"] = df_day["time"].dt.hour
df_day["minute"] = df_day["time"].dt.minute
df_day["dayofyear"] = df_day["time"].dt.dayofyear

# ---- 5. Подтягиваем спецификацию панели из CSV ----
spec_file = os.path.join(DATA_DIR, "tag_spec_local.csv")
df_spec = pd.read_csv(spec_file)

row_spec = df_spec[df_spec["tag"] == TOPIC]
if row_spec.empty:
    print(f"⚠ В tag_spec_local.csv не найден тег {TOPIC}")
else:
    spec = row_spec.iloc[0].to_dict()
    # добавляем все поля спеки как константы по строкам
    for col, val in spec.items():
        df_day[col] = val

# ---- 6. Финальный learning dataset ----
cols_order = [
    "time",
    "date",
    "hour",
    "minute",
    "dayofyear",
    "temp_c",
    "cloud",
    "power_kw",
]

# добавляем, если есть в df_day
spec_cols = [c for c in [
    "tag", "sm_user_object_id",
    "latitude", "longitude",
    "tilt", "azimuth",
    "module_length", "module_width",
    "module_efficiency",
    "total_panels",
    "commissioning_date",
    "degradation_rate"
] if c in df_day.columns]

learning_df = df_day[cols_order + spec_cols].copy()
learning_df = learning_df.sort_values("time").reset_index(drop=True)

out_path = os.path.join(
    DATA_DIR,
    f"learning_dataset_uid{USER_OBJECT_ID}_{TOPIC.replace('/', '_')}.csv"
)
learning_df.to_csv(out_path, index=False, encoding="utf-8")

print("✅ Learning dataset сохранён в:", out_path)
display(learning_df.head())


Погода: (23468, 5)
Выработка (сырая): (26946, 5)
Выработка (15 мин): (16426, 2)
После merge: (11371, 6)
После фильтра дня (power_kw > 0): (7034, 6)
✅ Learning dataset сохранён в: data\learning_dataset_uid70_P0086H01_I002_Ptot.csv


Unnamed: 0,time,date,hour,minute,dayofyear,temp_c,cloud,power_kw,tag,sm_user_object_id,latitude,longitude,tilt,azimuth,module_length,module_width,module_efficiency,total_panels,commissioning_date,degradation_rate
0,2025-01-31 05:30:00,2025-01-31,5,30,31,9.2,33.0,5.147287,P0086H01/I002/Ptot,70,43.151251,27.451342,15.0,,2278.0,1134.0,21.2,172,2020-01-01,0.55
1,2025-01-31 05:45:00,2025-01-31,5,45,31,9.2,33.0,306.941863,P0086H01/I002/Ptot,70,43.151251,27.451342,15.0,,2278.0,1134.0,21.2,172,2020-01-01,0.55
2,2025-01-31 06:00:00,2025-01-31,6,0,31,8.8,56.0,412.689934,P0086H01/I002/Ptot,70,43.151251,27.451342,15.0,,2278.0,1134.0,21.2,172,2020-01-01,0.55
3,2025-01-31 06:15:00,2025-01-31,6,15,31,8.8,56.0,949.100783,P0086H01/I002/Ptot,70,43.151251,27.451342,15.0,,2278.0,1134.0,21.2,172,2020-01-01,0.55
4,2025-01-31 06:30:00,2025-01-31,6,30,31,8.8,56.0,1073.821667,P0086H01/I002/Ptot,70,43.151251,27.451342,15.0,,2278.0,1134.0,21.2,172,2020-01-01,0.55


In [6]:
import pvlib
import pandas as pd
import numpy as np
import os

# Если learning_df уже в памяти – ок.
# Если нет, можно подгрузить:
# learning_df = pd.read_csv("data/learning_dataset_uid70_P0086H01_I002_Ptot.csv", parse_dates=["time"])

# ---- 1. Параметры панели ----
lat = float(learning_df["latitude"].iloc[0])
lon = float(learning_df["longitude"].iloc[0])
tilt = float(learning_df["tilt"].iloc[0])
azimuth = float(learning_df["azimuth"].iloc[0]) if "azimuth" in learning_df.columns else np.nan

# если азимут NaN → считаем, что панели смотрят на юг (180°)
if np.isnan(azimuth):
    azimuth = 180.0

print("Параметры панели:")
print(" lat =", lat, " lon =", lon, " tilt =", tilt, " azimuth =", azimuth)

# ---- 2. Таймстемпы → DatetimeIndex с таймзоной ----
times = pd.to_datetime(learning_df["time"])

# если без таймзоны — локализуем
if times.dt.tz is None:
    times = times.dt.tz_localize("Europe/Sofia")

# ВАЖНО: превращаем в DatetimeIndex (а не Series)
times = pd.DatetimeIndex(times)

# ---- 3. Clearksy GHI/DNI/DHI ----
location = pvlib.location.Location(lat, lon, tz="Europe/Sofia")
cs = location.get_clearsky(times, model="ineichen")

# ---- 4. Положение солнца и POA ----
solar_position = location.get_solarposition(times)

poa = pvlib.irradiance.get_total_irradiance(
    surface_tilt=tilt,
    surface_azimuth=azimuth,
    dni=cs["dni"],
    ghi=cs["ghi"],
    dhi=cs["dhi"],
    solar_zenith=solar_position["zenith"],
    solar_azimuth=solar_position["azimuth"],
)

learning_df["clearsky_poa"] = poa["poa_global"]

print("Добавлена колонка clearsky_poa — радиация чистого неба на плоскость панели (Вт/м²)")
display(learning_df.head())

# ---- 5. Сохраняем обновлённый датасет ----
if "out_path" in globals():
    save_path = out_path.replace(".csv", "_with_poa.csv")
else:
    save_path = os.path.join("data", "learning_dataset_with_poa.csv")

learning_df.to_csv(save_path, index=False, encoding="utf-8")
print("✅ Обновлённый файл сохранён в:", save_path)




Параметры панели:
 lat = 43.151251  lon = 27.451342  tilt = 15.0  azimuth = 180.0
Добавлена колонка clearsky_poa — радиация чистого неба на плоскость панели (Вт/м²)


Unnamed: 0,time,date,hour,minute,dayofyear,temp_c,cloud,power_kw,tag,sm_user_object_id,...,longitude,tilt,azimuth,module_length,module_width,module_efficiency,total_panels,commissioning_date,degradation_rate,clearsky_poa
0,2025-01-31 05:30:00,2025-01-31,5,30,31,9.2,33.0,5.147287,P0086H01/I002/Ptot,70,...,27.451342,15.0,,2278.0,1134.0,21.2,172,2020-01-01,0.55,
1,2025-01-31 05:45:00,2025-01-31,5,45,31,9.2,33.0,306.941863,P0086H01/I002/Ptot,70,...,27.451342,15.0,,2278.0,1134.0,21.2,172,2020-01-01,0.55,
2,2025-01-31 06:00:00,2025-01-31,6,0,31,8.8,56.0,412.689934,P0086H01/I002/Ptot,70,...,27.451342,15.0,,2278.0,1134.0,21.2,172,2020-01-01,0.55,
3,2025-01-31 06:15:00,2025-01-31,6,15,31,8.8,56.0,949.100783,P0086H01/I002/Ptot,70,...,27.451342,15.0,,2278.0,1134.0,21.2,172,2020-01-01,0.55,
4,2025-01-31 06:30:00,2025-01-31,6,30,31,8.8,56.0,1073.821667,P0086H01/I002/Ptot,70,...,27.451342,15.0,,2278.0,1134.0,21.2,172,2020-01-01,0.55,


✅ Обновлённый файл сохранён в: data\learning_dataset_uid70_P0086H01_I002_Ptot_with_poa.csv


In [3]:
!pip install pvlib

Collecting pvlib
  Downloading pvlib-0.13.1-py3-none-any.whl.metadata (2.9 kB)
Collecting scipy>=1.7.2 (from pvlib)
  Downloading scipy-1.16.3-cp312-cp312-win_amd64.whl.metadata (60 kB)
Collecting h5py (from pvlib)
  Downloading h5py-3.15.1-cp312-cp312-win_amd64.whl.metadata (3.1 kB)
Downloading pvlib-0.13.1-py3-none-any.whl (19.3 MB)
   ---------------------------------------- 0.0/19.3 MB ? eta -:--:--
   ---- ----------------------------------- 2.1/19.3 MB 11.8 MB/s eta 0:00:02
   ------ --------------------------------- 3.1/19.3 MB 8.0 MB/s eta 0:00:03
   -------- ------------------------------- 4.2/19.3 MB 6.6 MB/s eta 0:00:03
   ---------- ----------------------------- 5.2/19.3 MB 6.1 MB/s eta 0:00:03
   ------------- -------------------------- 6.3/19.3 MB 5.8 MB/s eta 0:00:03
   -------------- ------------------------- 7.1/19.3 MB 5.7 MB/s eta 0:00:03
   ---------------- ----------------------- 7.9/19.3 MB 5.4 MB/s eta 0:00:03
   ------------------ --------------------- 8.9/19.3 


[notice] A new release of pip is available: 25.0.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip
