In [1]:
# H1 — Imports & Config (LSTM-only)
import os, math, warnings
from datetime import datetime
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd

# TensorFlow (LSTM)
import tensorflow as tf

# Tekrarlanabilirlik
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)
tf.random.set_seed(RANDOM_STATE)

# ---- Metrics ----
from sklearn.metrics import mean_absolute_error, mean_squared_error

def rmse(y_true, y_pred):
    return math.sqrt(mean_squared_error(y_true, y_pred))

def mape(y_true, y_pred):
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)
    mask = y_true != 0
    return (np.abs((y_true[mask] - y_pred[mask]) / y_true[mask]).mean() * 100) if mask.any() else np.nan

def smape(y_true, y_pred):
    y_true = np.asarray(y_true, dtype=float)
    y_pred = np.asarray(y_pred, dtype=float)
    denom = (np.abs(y_true) + np.abs(y_pred)) / 2.0
    mask = denom != 0
    return (np.mean(np.abs(y_true[mask] - y_pred[mask]) / denom[mask]) * 100) if mask.any() else np.nan

# Çıktı klasörleri
PLOT_DIR = "dl_results/plots"
os.makedirs(PLOT_DIR, exist_ok=True)

In [3]:
# H2 — Data Load (Flexible schema, monthly-ready) + Demo fallback

# ---- Kullanıcı ayarları ----
DATA_PATH      = os.getenv("DATA_PATH", "")  # CSV/Parquet/XLSX dosya yolu (boşsa demo)
AGG_TO_MONTH   = True      # Günlük/haftalık ise True → aylığa topla
AGG_STRATEGY   = "sum"     # 'sum' (adet/satışlar) | 'mean' (fiyat vb.)
FREQ           = "MS"      # Monthly Start

# ---- Esnek şema alias'ları ----
ALIASES_DATE = ["tarih", "date", "ds", "StartDate", "startDate"]
ALIASES_PID  = ["product_id", "urun_kodu", "sku", "productId"]
ALIASES_Y    = ["y", "sales", "satis", "satis_miktari", "count", "value"]

def _resolve_col(cols, aliases, required_name):
    for a in aliases:
        if a in cols:
            return a
    raise ValueError(f"{required_name} kolonu bekleniyor. Alternatiflerden biri olmalı: {aliases}")

def _load_any(path):
    if path.endswith(".csv"):
        return pd.read_csv(path)
    if path.endswith(".parquet"):
        return pd.read_parquet(path)
    if path.endswith(".xlsx"):
        return pd.read_excel(path)
    raise ValueError("Desteklenmeyen format. CSV/XLSX/Parquet kullanın.")

# ---- Veri yükle / Demo oluştur ----
if DATA_PATH and os.path.exists(DATA_PATH):
    df_raw = _load_any(DATA_PATH)
    print(f"Loaded: {DATA_PATH} shape={df_raw.shape}")
else:
    # Eğer dışarıdan df/df_lstm hazırlanmışsa onu kullan; yoksa demo üret
    if 'df_lstm' in globals():
        print("[INFO] df_lstm bulundu; H3'te doğrudan kullanılacak.")
        df_raw = None
    elif 'df' in globals():
        print("[INFO] df bulundu; H3'te normalize edilerek kullanılacak.")
        df_raw = df.copy()
    else:
        print("[INFO] DATA_PATH yok. Demo veri (5 ürün x 36 ay) oluşturuluyor.")
        rng = pd.date_range("2024-01-01", "2026-12-01", freq="MS")
        rows = []
        for pid in [f"P{i}" for i in range(1,6)]:
            base  = np.linspace(10, 25, len(rng))
            seas  = 5*np.sin(np.linspace(0, 6*np.pi, len(rng)))
            noise = np.random.normal(0, 2.5, len(rng))
            y     = np.maximum(0, base + seas + noise + np.random.randint(0,6))
            for d, val in zip(rng, y):
                rows.append({"tarih": d, "product_id": pid, "y": float(round(val,2))})
        df_raw = pd.DataFrame(rows)

if df_raw is not None:
    # ---- Şema normalizasyonu ----
    date_col = _resolve_col(df_raw.columns, ALIASES_DATE, "Tarih")
    pid_col  = _resolve_col(df_raw.columns, ALIASES_PID,  "Product ID")
    y_col    = _resolve_col(df_raw.columns, ALIASES_Y,    "Target (y)")

    dfn = (
        df_raw[[date_col, pid_col, y_col]]
          .rename(columns={date_col:"date", pid_col:"product_id", y_col:"y"})
          .copy()
    )
    dfn["date"] = pd.to_datetime(dfn["date"])
    dfn["y"] = pd.to_numeric(dfn["y"], errors="coerce")
    dfn = dfn.dropna(subset=["date", "product_id", "y"])
    dfn = dfn.sort_values(["product_id", "date"]).reset_index(drop=True)
    dfn = dfn.groupby(["product_id", "date"], as_index=False)["y"].sum()

    # ---- Günlük/haftalık veriyi aylığa toparla (opsiyonel) ----
    if AGG_TO_MONTH:
        agg_fn = "sum" if AGG_STRATEGY == "sum" else "mean"
        dfn = (
            dfn.set_index("date")
               .groupby("product_id")
               .resample(FREQ)[["y"]]
               .agg(agg_fn)
               .reset_index()
        )

    # ---- Aylık frekansı garanti et (eksik ayları ekle) ----
    def _ensure_monthly(g):
        g = g.set_index("date").asfreq(FREQ)
        g["product_id"] = g["product_id"].ffill().bfill()
        return g.reset_index()

    dfn = dfn.groupby("product_id", group_keys=False).apply(_ensure_monthly)
    dfn["y"] = dfn["y"].clip(lower=0)

    # LSTM source view
    df_lstm = dfn[["product_id", "date", "y"]].sort_values(["product_id", "date"]).reset_index(drop=True)

print("LSTM table shape:", df_lstm.shape)
print(df_lstm["product_id"].nunique(), "ürün; tarih aralığı:", df_lstm["date"].min().date(), "→", df_lstm["date"].max().date())
df_lstm.head()

[INFO] DATA_PATH yok. Demo veri (5 ürün x 36 ay) oluşturuluyor.
LSTM table shape: (180, 3)
5 ürün; tarih aralığı: 2024-01-01 → 2026-12-01


Unnamed: 0,product_id,date,y
0,P1,2024-01-01,12.24
1,P1,2024-02-01,13.65
2,P1,2024-03-01,17.88
3,P1,2024-04-01,21.09
4,P1,2024-05-01,16.3


In [5]:
# H3 — LSTM DataPrep (sequence & helpers)

# ====== Hyperparams (başlangıç) ======
SEQ_LEN   = 12
BATCH     = 32
EPOCHS    = 60
PATIENCE  = 8
LR        = 1e-3
DROPOUT   = 0.2
CLIP_NORM = 1.0
VAL_SPLIT = 0.1

def _clean_series(g: pd.DataFrame) -> pd.DataFrame:
    g = g.sort_values("date").copy()
    g["y"] = g["y"].interpolate("linear").ffill().bfill()
    return g

def build_sequences(frame: pd.DataFrame, seq_len: int = SEQ_LEN):
    """
    Global eğitim için sequence üretimi.
    Her ürün ayrı normalize edilir (kendi mean/std), sonra z-skor dizileri birleştirilir.
    """
    X, y, idx_info = [], [], []
    for pid, g in frame.groupby("product_id"):
        g = _clean_series(g)
        vals = g["y"].values.astype("float32")
        if len(vals) < seq_len + 1:
            continue
        mean = float(vals.mean())
        std  = float(vals.std()) if float(vals.std()) > 0 else 1.0
        z = (vals - mean) / std
        for i in range(seq_len, len(z)):
            X.append(z[i-seq_len:i].reshape(seq_len, 1))
            y.append(z[i])
            idx_info.append({"product_id": pid, "date": g.iloc[i]["date"], "mean": mean, "std": std})
    if len(X) == 0:
        return np.empty((0, seq_len, 1), dtype="float32"), np.empty((0,), dtype="float32"), pd.DataFrame(idx_info)
    return np.asarray(X, dtype="float32"), np.asarray(y, dtype="float32"), pd.DataFrame(idx_info)

def build_model(seq_len: int = SEQ_LEN):
    tf.random.set_seed(42)
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(seq_len, 1)),
        tf.keras.layers.LSTM(64, return_sequences=True),
        tf.keras.layers.Dropout(DROPOUT),
        tf.keras.layers.LSTM(32),
        tf.keras.layers.Dense(1)
    ])
    opt = tf.keras.optimizers.Adam(learning_rate=LR, clipnorm=CLIP_NORM)
    model.compile(optimizer=opt, loss="mse")
    return model

def lopo_lstm_eval(frame: pd.DataFrame, cutoff: pd.Timestamp) -> pd.DataFrame:
    """
    LOPO: hedef ürün (pid) eğitimden tamamen çıkarılır;
    diğer ürünlerle cutoff'a kadar eğitilir. Hedef ürünün cutoff sonrası İLK ayı tahmin edilir.
    """
    products = frame["product_id"].unique().tolist()
    rows = []

    for pid in products:
        train_df = frame[(frame["product_id"] != pid) & (frame["date"] <= cutoff)].copy()
        test_df  = frame[frame["product_id"] == pid].copy()

        test_hist  = test_df[test_df["date"] <= cutoff].copy()
        future_row = test_df[test_df["date"] > cutoff].sort_values("date").head(1)
        if test_hist.shape[0] < SEQ_LEN or future_row.empty:
            continue

        # Train sequences (global, target product excluded)
        X_tr, y_tr, _ = build_sequences(train_df, SEQ_LEN)
        if X_tr.shape[0] == 0:
            continue

        # Test sequence from target product
        test_hist = _clean_series(test_hist)
        vals = test_hist["y"].values.astype("float32")
        mean, std = float(vals.mean()), float(vals.std()) if float(vals.std()) > 0 else 1.0
        z = (vals - mean) / std
        X_te = z[-SEQ_LEN:].reshape(1, SEQ_LEN, 1)
        y_true = float(future_row["y"].values[0])

        # Model & train
        model = build_model(SEQ_LEN)
        callbacks = [
            tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=PATIENCE, restore_best_weights=True),
            tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=max(2, PATIENCE//2), min_lr=1e-5, verbose=0)
        ]
        model.fit(X_tr, y_tr, validation_split=VAL_SPLIT, epochs=EPOCHS, batch_size=BATCH, verbose=0, callbacks=callbacks)

        # Predict (de-normalize)
        y_pred_z = float(model.predict(X_te, verbose=0)[0, 0])
        y_pred   = y_pred_z * std + mean

        rows.append({
            "product_id": pid, "model": "lstm", "y_pred": float(y_pred), "y_true": y_true,
            "MAE": abs(y_true - y_pred),
            "RMSE": math.sqrt((y_true - y_pred) ** 2),
            "MAPE": (abs(y_true - y_pred) / y_true * 100) if y_true != 0 else np.nan,
            "sMAPE": (abs(y_true - y_pred) / ((abs(y_true) + abs(y_pred)) / 2) * 100) if (abs(y_true) + abs(y_pred)) > 0 else np.nan
        })

    return pd.DataFrame(rows)

In [7]:
# H4 — Naive-12 baseline (referans)

def naive12_eval(frame: pd.DataFrame, cutoff: pd.Timestamp) -> pd.DataFrame:
    """
    Basit referans: y_{t-12}. Cutoff sonrası ilk ayı, 12 ay önceki değerle tahmin eder.
    """
    rows=[]
    for pid,g in frame.groupby("product_id"):
        g=g.sort_values("date")
        y_true = g.loc[g["date"]>cutoff,"y"].head(1)
        idx = g.index[g["date"]==cutoff]
        if y_true.empty or len(idx)==0: 
            continue
        pos = idx[0]
        if pos-11-1<0: 
            continue
        y_pred = g.loc[pos-11-1,"y"]  # t-12
        yt=float(y_true.values[0]); yp=float(y_pred)
        rows.append({
            "product_id":pid,"model":"naive12",
            "MAE":abs(yt-yp),
            "RMSE":math.sqrt((yt-yp)**2),
            "MAPE":(abs(yt-yp)/yt*100) if yt!=0 else np.nan,
            "sMAPE":(abs(yt-yp)/((abs(yt)+abs(yp))/2)*100) if (abs(yt)+abs(yp))>0 else np.nan
        })
    return pd.DataFrame(rows)

# Hızlı kontrol (tek cutoff)
_global_last = df_lstm["date"].max()
_cutoff_example = (_global_last - pd.offsets.MonthBegin(1))  # son ayın öncesi
print("Example cutoff:", _cutoff_example.date())
print("Naive sample head:")
naive12_eval(df_lstm, _cutoff_example).head()

Example cutoff: 2026-11-01
Naive sample head:


Unnamed: 0,product_id,model,MAE,RMSE,MAPE,sMAPE
0,P1,naive12,5.64,5.64,24.575163,28.017884
1,P2,naive12,15.85,15.85,50.997426,68.451738
2,P3,naive12,8.47,8.47,28.365707,33.053659
3,P4,naive12,10.46,10.46,43.062989,54.879328
4,P5,naive12,11.59,11.59,44.525547,57.276995


In [9]:
# H5 — LOPO LSTM değerlendirme (çoklu cutoff) + çizgi grafikler + özet CSV

import matplotlib.pyplot as plt

# Aynı ML raporundaki gibi: 2026 içinde birkaç cutoff tarayalım (t+1 mevcut olsun)
cutoffs = pd.date_range("2026-03-01","2026-09-01",freq="MS")

all_rows = []
for c in cutoffs:
    lstm_df = lopo_lstm_eval(df_lstm, c)
    nv_df   = naive12_eval(df_lstm, c)

    if not lstm_df.empty:
        g = lstm_df[["MAE","RMSE","MAPE","sMAPE"]].mean()
        all_rows.append({"cutoff":c, "model":"lstm",
                         "MAE":float(g["MAE"]), "RMSE":float(g["RMSE"]),
                         "MAPE":float(g["MAPE"]), "sMAPE":float(g["sMAPE"])})
    if not nv_df.empty:
        g2 = nv_df[["MAE","RMSE","MAPE","sMAPE"]].mean()
        all_rows.append({"cutoff":c, "model":"naive12",
                         "MAE":float(g2["MAE"]), "RMSE":float(g2["RMSE"]),
                         "MAPE":float(g2["MAPE"]), "sMAPE":float(g2["sMAPE"])})

dl_lines = pd.DataFrame(all_rows).sort_values("cutoff").reset_index(drop=True)
dl_lines.to_csv("dl_results/dl_line_metrics.csv", index=False)
display(dl_lines.head())

# ---- Çizgi grafikleri kaydet (MAE/RMSE/MAPE/sMAPE) ----
def _plot_metric(metric):
    plt.figure()
    for name, g in dl_lines.groupby("model"):
        plt.plot(g["cutoff"], g[metric], marker="o", label=name)
    plt.title(metric)
    plt.xlabel("Cutoff")
    plt.ylabel(metric)
    plt.legend()
    plt.xticks(rotation=45)
    plt.tight_layout()
    out_path = os.path.join(PLOT_DIR, f"line_{metric.lower()}.png")
    plt.savefig(out_path, dpi=160)
    plt.close()
    print("Saved:", out_path)

for m in ["MAE","RMSE","MAPE","sMAPE"]:
    _plot_metric(m)

# ---- Ortalama (cutoff ortalaması) — rapor tablosu ----
dl_summary = (dl_lines
              .groupby("model")[["MAE","RMSE","MAPE","sMAPE"]]
              .mean()
              .reset_index()
              .sort_values("MAE"))
dl_summary.to_csv("dl_results/dl_summary.csv", index=False)
print("\n=== DL Summary (cutoff ortalaması) ===")
print(dl_summary.to_string(index=False))

# Kısa markdown notu (opsiyonel)
with open("dl_results/dl_report.md","w", encoding="utf-8") as f:
    f.write("# LSTM (DL) — LOPO t+1 Değerlendirme\n\n")
    f.write("- Çizgiler: dl_results/plots/line_[mae|rmse|mape|smape].png\n")
    f.write("- Satır içi metrikler: dl_results/dl_line_metrics.csv\n")
    f.write("- Özet tablo: dl_results/dl_summary.csv\n\n")
    f.write(f"Üretim zamanı: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
print("Markdown rapor yazıldı: dl_results/dl_report.md")



Unnamed: 0,cutoff,model,MAE,RMSE,MAPE,sMAPE
0,2026-03-01,lstm,3.359272,3.359272,11.636465,12.476089
1,2026-03-01,naive12,5.554,5.554,19.527938,22.369237
2,2026-04-01,lstm,3.532207,3.532207,13.369277,13.436771
3,2026-04-01,naive12,4.242,4.242,15.740537,17.508879
4,2026-05-01,lstm,3.534007,3.534007,13.470147,14.069294


Saved: dl_results/plots\line_mae.png
Saved: dl_results/plots\line_rmse.png
Saved: dl_results/plots\line_mape.png
Saved: dl_results/plots\line_smape.png

=== DL Summary (cutoff ortalaması) ===
  model      MAE     RMSE      MAPE     sMAPE
   lstm 2.794928 2.794928 11.723065 11.786215
naive12 4.988000 4.988000 21.034553 24.168418
Markdown rapor yazıldı: dl_results/dl_report.md
