In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
import joblib
import os
import sys
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Config importu
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
import config

def load_data():
    print(f"Veri yÃ¼kleniyor: {config.DATA_PATH}")
    df = pd.read_csv(config.DATA_PATH, decimal=',')
    df.columns = [col.replace(' ', '_').replace('%_', '').replace('(', '').replace(')', '') for col in df.columns]
    df[config.DATE_COL] = pd.to_datetime(df[config.DATE_COL], errors='coerce')
    df = df.sort_values(config.DATE_COL)
    return df

def feature_engineering(df):
    print("Ã–zellik mÃ¼hendisliÄŸi yapÄ±lÄ±yor...")
    input_cols = [col for col in df.columns if col not in [config.DATE_COL, config.TARGET]]
    for col in input_cols:
        df[f'{col}_Rolling_Mean'] = df[col].rolling(window=5).mean()
        df[f'{col}_Lag1'] = df[col].shift(1)
    return df.dropna()

def train(df):
    print("Model eÄŸitiliyor...")
    X = df.drop([config.DATE_COL, config.TARGET], axis=1)
    y = df[config.TARGET]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, shuffle=False, random_state=42)

    model = xgb.XGBRegressor(**config.MODEL_PARAMS)
    model.fit(X_train, y_train)

    # Metrikler
    y_pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    print(f"RMSE: {rmse:.4f}, R2: {r2:.4f}")

    return model

def save(model):
    os.makedirs(os.path.dirname(config.MODEL_PATH), exist_ok=True)
    joblib.dump(model, config.MODEL_PATH)
    print(f"Model kaydedildi: {config.MODEL_PATH}")

if __name__ == "__main__":
    df = load_data()
    df = feature_engineering(df)
    model = train(df)
    save(model)

1. Veri YÃ¼kleniyor...
2. Ã–zellik MÃ¼hendisliÄŸi (Lag & Rolling) YapÄ±lÄ±yor...
3. Model EÄŸitiliyor (XGBoost)...
--- Model BaÅŸarÄ±sÄ± ---
RMSE: 0.6457
R2 Score: 0.7028
4. Model Kaydediliyor: ../models/final_xgboost_model.pkl
Ä°ÅŸlem TamamlandÄ±! âœ…


# ğŸš€ Final Pipeline Mimarisi

Projenin canlÄ±ya (deployment) alÄ±nan final pipeline yapÄ±sÄ± ÅŸu adÄ±mlardan oluÅŸur:

1. **Veri AlÄ±mÄ±:** Ham veri CSV formatÄ±nda yÃ¼klenir.
2. **Ã–n Ä°ÅŸleme:** Tarih formatÄ± dÃ¼zeltilir, zaman sÄ±ralamasÄ± yapÄ±lÄ±r.
3. **Feature Engineering:** Her sensÃ¶r verisi iÃ§in `Rolling Mean (Window=5)` ve `Lag1` Ã¶zellikleri tÃ¼retilir. NaN deÄŸerler temizlenir.
4. **Modelleme (XGBoost):** Optimize edilmiÅŸ parametrelerle (`max_depth=3`, `lr=0.04`) eÄŸitilmiÅŸ XGBoost modeli tahmin Ã¼retir.
5. **Kalibrasyon:** Laboratuvardan gelen verilerle bias correction uygulanÄ±r.

**Neden Bu SeÃ§imler YapÄ±ldÄ±?**
* **Model:** XGBoost, hÄ±zlÄ±, yÃ¼ksek performanslÄ± ve eksik verilere karÅŸÄ± direnÃ§li olduÄŸu iÃ§in seÃ§ildi.
* **Feature Set:** Sadece ham veri yerine "Trendi" (Rolling) ve "TarihÃ§eyi" (Lag) iÃ§eren set kullanÄ±ldÄ± Ã§Ã¼nkÃ¼ madencilik dinamik bir sÃ¼reÃ§tir; anlÄ±k deÄŸerden Ã§ok deÄŸiÅŸim yÃ¶nÃ¼ Ã¶nemlidir.