In [None]:
import pandas as pd
import numpy as np
import shap
import matplotlib.pyplot as plt
import tensorflow as tf
import xgboost as xgb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.layers import LSTM, RepeatVector, TimeDistributed
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc, precision_score, recall_score, f1_score
from sklearn.manifold import TSNE
from imblearn.over_sampling import SMOTE
from catboost import CatBoostClassifier
from xgboost import XGBClassifier  
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages
import os
import tkinter as tk
from tkinter import simpledialog, messagebox
import time
import datetime
import subprocess
import platform
from sklearn.model_selection import learning_curve

# Ba≈ülangƒ±√ß zamanƒ± kaydƒ±
start_time = time.time()

# -------------------- TKINTER ARAY√úZ√ú --------------------
root = tk.Tk()
root.withdraw()

try:
    data_fraction = float(simpledialog.askstring("Veri Y√ºzdesi", "Analiz i√ßin veri y√ºzdesini girin (0.0 - 1.0):", initialvalue="0.015"))
    if not (0 < data_fraction <= 1):
        raise ValueError("Ge√ßersiz oran")
except:
    messagebox.showerror("Hata", "Ge√ßerli bir oran girilmedi. Program sonlandƒ±rƒ±lƒ±yor.")
    exit()

try:
    epochs = int(simpledialog.askstring("Epoch Sayƒ±sƒ±", "LSTM Autoencoder i√ßin epoch sayƒ±sƒ±nƒ± girin:", initialvalue="15"))
except:
    messagebox.showerror("Hata", "Ge√ßerli bir epoch sayƒ±sƒ± girilmedi. Program sonlandƒ±rƒ±lƒ±yor.")
    exit()

# -------------------- DOSYA YOLLARI --------------------
telemetry_path = r"C:\Users\FEYYAZ\Desktop\PdM_telemetry.csv"
failures_path = r"C:\Users\FEYYAZ\Desktop\PdM_failures.csv"

# Rapor i√ßin benzersiz dosya adƒ± olu≈üturma
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
report_filename = f"model_comparison_report_{timestamp}.pdf"

# -------------------- VERƒ∞ Y√úKLEME --------------------
print("Veri y√ºkleniyor...")
try:
    telemetry = pd.read_csv(telemetry_path)
    failures = pd.read_csv(failures_path)
    telemetry['datetime'] = pd.to_datetime(telemetry['datetime'])
    failures['datetime'] = pd.to_datetime(failures['datetime'])
except Exception as e:
    messagebox.showerror("Hata", f"Veri dosyalarƒ± y√ºklenemedi: {str(e)}")
    exit()

# -------------------- VERƒ∞ √ñN ƒ∞≈ûLEME --------------------
print("Veri √∂n i≈üleniyor...")
telemetry_sampled = telemetry.sample(frac=data_fraction, random_state=42)
telemetry_hourly = telemetry_sampled.groupby(['machineID', pd.Grouper(key='datetime', freq='h')]).mean().reset_index()
telemetry_hourly['failure'] = 0

# Ba≈üarƒ±sƒ±zlƒ±klarƒ± i≈üaretleme
for _, row in failures.iterrows():
    mask = (
        (telemetry_hourly['machineID'] == row['machineID']) &
        (telemetry_hourly['datetime'] == row['datetime'])
    )
    telemetry_hourly.loc[mask, 'failure'] = 1

# -------------------- ZAMAN SERƒ∞Sƒ∞ √ñZELLƒ∞K M√úHENDƒ∞SLƒ∞ƒûƒ∞ --------------------
print("Zaman serisi √∂zellikleri ekleniyor...")
def add_time_series_features(df, features, lag_steps=2, window_size=3):
    df = df.sort_values(['machineID', 'datetime']).copy()
    for feature in features:
        for lag in range(1, lag_steps + 1):
            df[f'{feature}_lag_{lag}'] = df.groupby('machineID')[feature].shift(lag)
        df[f'{feature}_ma_{window_size}'] = df.groupby('machineID')[feature].rolling(window=window_size).mean().reset_index(level=0, drop=True)
        df[f'{feature}_std_{window_size}'] = df.groupby('machineID')[feature].rolling(window=window_size).std().reset_index(level=0, drop=True)
        df[f'{feature}_diff'] = df.groupby('machineID')[feature].diff()
    df = df.fillna(0)
    return df

base_features = ['volt', 'rotate', 'pressure', 'vibration']
telemetry_hourly = add_time_series_features(telemetry_hourly, base_features, lag_steps=2, window_size=3)

# Yeni √∂zellik listesi
features = base_features + [
    f'{feat}_lag_{lag}' for feat in base_features for lag in range(1, 3)
] + [
    f'{feat}_ma_3' for feat in base_features
] + [
    f'{feat}_std_3' for feat in base_features
] + [
    f'{feat}_diff' for feat in base_features
]

# -------------------- √ñZELLƒ∞K SE√áƒ∞Mƒ∞ (KORELASYON ANALƒ∞Zƒ∞) --------------------
print("Y√ºksek korelasyonlu √∂zellikler eleniyor...")
X_temp = telemetry_hourly[features]
corr_matrix = X_temp.corr().abs()
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
to_drop = [column for column in upper.columns if any(upper[column] > 0.8)]
print(f"Elenen √∂zellikler: {to_drop}")
features = [f for f in features if f not in to_drop]
print(f"Kalan √∂zellikler: {features}")

# -------------------- MODEL --------------------
print("Model hazƒ±rlanƒ±yor...")
X = telemetry_hourly[features]
y = telemetry_hourly['failure']
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

max_samples = min(len(X_scaled), 30000)
X_scaled_limited = X_scaled[:max_samples]
y_limited = y[:max_samples]

print(f"SMOTE uygulanƒ±yor... (Veri boyutu: {len(X_scaled_limited)})")
smote = SMOTE(random_state=42, k_neighbors=5)
X_resampled, y_resampled = smote.fit_resample(X_scaled_limited, y_limited)

X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, stratify=y_resampled, random_state=42
)

X_test_df = pd.DataFrame(X_test, columns=features)

# -------------------- D√ñRT MODELƒ∞ DE Eƒûƒ∞TME --------------------
print("Logistic Regression, Random Forest, CatBoost ve XGBoost modelleri eƒüitiliyor...")

models = {
    'Logistic Regression': LogisticRegression(
        max_iter=1000,
        n_jobs=-1,
        class_weight='balanced',
        C=0.01,
        solver='liblinear'
    ),
    'Random Forest': RandomForestClassifier(
        n_estimators=200,
        max_depth=3,
        min_samples_split=30,
        min_samples_leaf=15,
        max_features='sqrt',
        max_samples=0.7,
        class_weight='balanced_subsample',
        random_state=42,
        n_jobs=-1
    ),
    'CatBoost': CatBoostClassifier(
        iterations=50,
        depth=2,
        l2_leaf_reg=50,
        subsample=0.6,
        bagging_temperature=1.5,
        random_state=42,
        verbose=0
    ),
    'XGBoost': XGBClassifier(
        n_estimators=80,
        max_depth=2,
        learning_rate=0.01,
        subsample=0.5,
        colsample_bytree=0.5,
        min_child_weight=20,
        reg_alpha=2.0,
        objective='binary:logistic',
        eval_metric='logloss',
        random_state=42,
        n_jobs=-1
    )
}

results = {}
shap_values_dict = {}
y_pred_dict = {}
y_proba_dict = {}
xgboost_model = None
xgboost_best_iteration = 100

for name, model in models.items():
    print(f"\n‚û°Ô∏è {name} modeli eƒüitiliyor...")
    if name == 'XGBoost':
        params = {
            'max_depth': 2,
            'learning_rate': 0.007,
            'subsample': 0.7,
            'colsample_bytree': 0.7,
            'min_child_weight': 20,
            'reg_lambda': 100,
            'reg_alpha': 10.0,
            'gamma': 10,
            'objective': 'binary:logistic',
            'eval_metric': 'logloss',
            'random_state': 42
        }
        dtrain = xgb.DMatrix(X_train, label=y_train, feature_names=features)
        dtest = xgb.DMatrix(X_test, label=y_test, feature_names=features)
        evals = [(dtrain, 'train'), (dtest, 'test')]
        bst = xgb.train(
            params,
            dtrain,
            num_boost_round=300,
            evals=evals,
            early_stopping_rounds=30,
            verbose_eval=False
        )
        xgboost_model = bst
        xgboost_best_iteration = bst.best_iteration + 1
        y_pred = bst.predict(dtest, iteration_range=(0, bst.best_iteration + 1))
        y_pred = (y_pred > 0.5).astype(int)
        y_proba = bst.predict(dtest, iteration_range=(0, bst.best_iteration + 1))
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_proba = model.predict_proba(X_test)[:, 1]
    fpr, tpr, _ = roc_curve(y_test, y_proba)
    roc_auc = auc(fpr, tpr)
    results[name] = {
        'Precision': precision_score(y_test, y_pred),
        'Recall': recall_score(y_test, y_pred),
        'F1 Score': f1_score(y_test, y_pred),
        'AUC': roc_auc,
        'fpr': fpr,
        'tpr': tpr
    }
    y_pred_dict[name] = y_pred
    y_proba_dict[name] = y_proba
    print(f"üîé {name} i√ßin SHAP deƒüerleri hesaplanƒ±yor...")
    sample_size = min(100, len(X_test_df))
    X_test_sample = X_test_df.iloc[:sample_size]
    try:
        if name == "XGBoost":
            explainer = shap.TreeExplainer(xgboost_model)
            shap_vals = explainer.shap_values(X_test_sample)
            shap_values_dict[name] = shap_vals
        elif name in ["Random Forest", "CatBoost"]:
            explainer = shap.TreeExplainer(model)
            shap_vals = explainer.shap_values(X_test_sample)
            shap_values_dict[name] = shap_vals[1] if isinstance(shap_vals, list) else shap_vals
        else:
            background = shap.kmeans(X_train, 10)
            explainer = shap.KernelExplainer(model.predict_proba, background)
            shap_values_dict[name] = explainer.shap_values(X_test_sample)[1]
        print(f"‚úÖ SHAP ba≈üarƒ±yla tamamlandƒ± - {name}")
    except Exception as e:
        print(f"‚ö†Ô∏è SHAP hesaplanamadƒ± - {name}: {str(e)}")
        shap_values_dict[name] = None

# -------------------- √ñƒûRENME EƒûRƒ∞Sƒ∞ G√ñRSELƒ∞ --------------------
def plot_learning_curve(model, X, y, model_name, cv=5, scoring='f1'):
    try:
        train_sizes, train_scores, test_scores = learning_curve(
            model, X, y, cv=cv, scoring=scoring, n_jobs=-1,
            train_sizes=np.linspace(0.1, 1.0, 10), shuffle=True, random_state=42
        )
        train_mean = np.mean(train_scores, axis=1)
        train_std = np.std(train_scores, axis=1)
        test_mean = np.mean(test_scores, axis=1)
        test_std = np.std(test_scores, axis=1)
        plt.figure(figsize=(10, 6))
        plt.plot(train_sizes, train_mean, 'o-', color='r', label='Eƒüitim Skoru')
        plt.plot(train_sizes, test_mean, 'o-', color='g', label='Doƒürulama Skoru')
        plt.fill_between(train_sizes, train_mean - train_std, train_mean + train_std, alpha=0.1, color='r')
        plt.fill_between(train_sizes, test_mean - test_std, test_mean + test_std, alpha=0.1, color='g')
        plt.title(f"{model_name} √ñƒürenme Eƒürisi")
        plt.xlabel("Eƒüitim √ñrnek Sayƒ±sƒ±")
        plt.ylabel("F1 Skoru")
        plt.legend(loc='lower right')
        plt.grid(alpha=0.3)
        plt.tight_layout()
        filename = f"learning_curve_{model_name.replace(' ', '_').lower()}.png"
        plt.savefig(filename, dpi=300)
        plt.close()
        return filename
    except Exception as e:
        print(f"√ñƒürenme eƒürisi olu≈üturulamadƒ± - {model_name}: {str(e)}")
        return None

rf_path = plot_learning_curve(models['Random Forest'], X_resampled, y_resampled, "Random Forest")
cb_path = plot_learning_curve(models['CatBoost'], X_resampled, y_resampled, "CatBoost")
# For XGBoost, create a temporary XGBClassifier for learning curve
xgb_temp = XGBClassifier(
  

)
xgb_path = plot_learning_curve(xgb_temp, X_resampled, y_resampled, "XGBoost")
print(f"√ñƒürenme eƒürisi g√∂rselleri olu≈üturuldu:\n- {rf_path}\n- {cb_path}\n- {xgb_path}")

# -------------------- KAR≈ûILA≈ûTIRMA G√ñRSELLE≈ûTƒ∞RMELERƒ∞ --------------------
metrics_table = pd.DataFrame({
    'Metric': ['Precision', 'Recall', 'F1 Score', 'AUC'],
    'Logistic Regression': [results['Logistic Regression']['Precision'], 
                          results['Logistic Regression']['Recall'],
                          results['Logistic Regression']['F1 Score'],
                          results['Logistic Regression']['AUC']],
    'Random Forest': [results['Random Forest']['Precision'], 
                    results['Random Forest']['Recall'],
                    results['Random Forest']['F1 Score'],
                    results['Random Forest']['AUC']],
    'CatBoost': [results['CatBoost']['Precision'], 
                results['CatBoost']['Recall'],
                results['CatBoost']['F1 Score'],
                results['CatBoost']['AUC']],
    'XGBoost': [results['XGBoost']['Precision'], 
                results['XGBoost']['Recall'],
                results['XGBoost']['F1 Score'],
                results['XGBoost']['AUC']]
})

print("\nüìã Model Performans Kar≈üƒ±la≈ütƒ±rmasƒ±:")
print(metrics_table)

try:
    plt.figure(figsize=(10, 8))
    for name, result in results.items():
        plt.plot(result['fpr'], result['tpr'], lw=2, label=f"{name} (AUC = {result['AUC']:.2f})")
    plt.plot([0, 1], [0, 1], linestyle='--', color='gray')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Eƒürisi Kar≈üƒ±la≈ütƒ±rmasƒ±')
    plt.legend(loc='lower right')
    plt.tight_layout()
    plt.savefig("roc_comparison.png", dpi=300)
    plt.close()
except Exception as e:
    print(f"ROC eƒürisi olu≈üturulamadƒ±: {str(e)}")

try:
    fig, axes = plt.subplots(2, 2, figsize=(12, 12))
    axes = axes.flatten()
    for i, (name, y_pred) in enumerate(y_pred_dict.items()):
        cm = confusion_matrix(y_test, y_pred)
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[i])
        axes[i].set_title(f'Confusion Matrix - {name}')
        axes[i].set_xlabel('Predicted')
        axes[i].set_ylabel('Actual')
    plt.tight_layout()
    plt.savefig("confusion_matrix_comparison.png", dpi=300)
    plt.close()
except Exception as e:
    print(f"Confusion matrix olu≈üturulamadƒ±: {str(e)}")

for name, shap_values in shap_values_dict.items():
    if shap_values is not None:
        try:
            plt.figure(figsize=(10, 6))
            shap.summary_plot(shap_values, X_test_sample, feature_names=features, show=False, plot_size=(8, 5))
            plt.title(f"SHAP Summary - {name}")
            plt.tight_layout()
            plt.savefig(f"shap_summary_{name.replace(' ', '_').lower()}.png", dpi=300)
            plt.close()
        except Exception as e:
            print(f"SHAP summary plot olu≈üturulamadƒ± - {name}: {str(e)}")
    else:
        print(f"SHAP summary plot atlandƒ± - {name} i√ßin SHAP deƒüerleri bulunamadƒ±")

for name, shap_values in shap_values_dict.items():
    if shap_values is not None:
        try:
            plt.figure(figsize=(10, 6))
            shap.plots.waterfall(shap_values[0], max_display=10, show=False)
            plt.title(f"SHAP Waterfall Plot - {name}")
            plt.tight_layout()
            plt.savefig(f"shap_waterfall_{name.replace(' ', '_').lower()}.png", dpi=300)
            plt.close()
        except Exception as e:
            print(f"ƒ∞lk waterfall plot olu≈üturulamadƒ± - {name}: {str(e)}")
            try:
                plt.figure(figsize=(10, 6))
                shap.plots.force(shap_values[0], show=False)
                plt.title(f"SHAP Force Plot - {name}")
                plt.tight_layout()
                plt.savefig(f"shap_force_{name.replace(' ', '_').lower()}.png", dpi=300)
                plt.close()
                print(f"Alternatif olarak Force Plot olu≈üturuldu - {name}")
            except Exception as e:
                print(f"Force plot da olu≈üturulamadƒ± - {name}: {str(e)}")

try:
    plt.figure(figsize=(10, 8))
    for i, model_name in enumerate(['Random Forest', 'CatBoost', 'XGBoost']):
        plt.subplot(3, 1, i+1)
        if model_name == 'XGBoost':
            # Extract feature importance from Booster
            importance = xgboost_model.get_score(importance_type='gain')
            feature_names = features  # Use original feature names
            importance_values = [importance.get(f, 0) for f in feature_names]
            indices = np.argsort(importance_values)[::-1]
            sorted_importances = [importance_values[i] for i in indices]
            sorted_features = [feature_names[i] for i in indices]
        else:
            importances = models[model_name].feature_importances_
            indices = np.argsort(importances)[::-1]
            sorted_importances = [importances[i] for i in indices]
            sorted_features = [features[i] for i in indices]
        sns.barplot(x=sorted_importances, y=sorted_features)
        plt.title(f"{model_name} √ñznitelik √ñnemi")
        plt.tight_layout()
    plt.savefig("tree_models_feature_importance.png", dpi=300)
    plt.close()
except Exception as e:
    print(f"√ñznitelik √∂nemi kar≈üƒ±la≈ütƒ±rmasƒ± olu≈üturulamadƒ±: {str(e)}")

for model_name in ['Random Forest', 'CatBoost', 'XGBoost']:
    try:
        plt.figure(figsize=(8, 5))
        if model_name == 'XGBoost':
            importance = xgboost_model.get_score(importance_type='gain')
            feature_names = features
            importance_values = [importance.get(f, 0) for f in feature_names]
            indices = np.argsort(importance_values)[::-1]
            sorted_importances = [importance_values[i] for i in indices]
            sorted_features = [feature_names[i] for i in indices]
        else:
            importances = models[model_name].feature_importances_
            indices = np.argsort(importances)[::-1]
            sorted_importances = [importances[i] for i in indices]
            sorted_features = [features[i] for i in indices]
        sns.barplot(x=sorted_importances, y=sorted_features)
        plt.title(f"{model_name} √ñznitelik √ñnemi")
        plt.tight_layout()
        plt.savefig(f"{model_name.lower().replace(' ', '_')}_feature_importance.png", dpi=300)
        plt.close()
    except Exception as e:
        print(f"√ñznitelik √∂nemi plotu olu≈üturulamadƒ± - {model_name}: {str(e)}")

print("t-SNE g√∂rselle≈ütirmesi hazƒ±rlanƒ±yor...")
try:
    tsne_size = min(5000, len(X_scaled))
    failure_indices = np.where(y[:tsne_size] == 1)[0]
    non_failure_indices = np.where(y[:tsne_size] == 0)[0]
    min_samples_per_class = min(len(failure_indices), len(non_failure_indices))
    replace_sampling = min_samples_per_class < 50
    sample_size = max(min(300, min_samples_per_class), 50)
    
    if len(failure_indices) > 0 and len(non_failure_indices) > 0:
        sampled_failure_indices = np.random.choice(
            failure_indices, 
            size=min(sample_size, len(failure_indices)), 
            replace=replace_sampling
        )
        sampled_non_failure_indices = np.random.choice(
            non_failure_indices, 
            size=min(sample_size, len(non_failure_indices)), 
            replace=replace_sampling
        )
        combined_indices = np.concatenate([sampled_failure_indices, sampled_non_failure_indices])
        X_for_tsne = X_scaled[combined_indices]
        y_for_tsne = y.iloc[combined_indices]
        perplexity_value = min(30, len(X_for_tsne) - 1)
        perplexity_value = max(2, perplexity_value)
        print(f"t-SNE kullanƒ±lan √∂rnek sayƒ±sƒ±: {len(X_for_tsne)}, perplexity: {perplexity_value}")
        tsne = TSNE(
            n_components=2, 
            perplexity=perplexity_value,
            random_state=42,
            learning_rate='auto',  
            n_iter=1000
        )
        X_embedded = tsne.fit_transform(X_for_tsne)
        plt.figure(figsize=(10, 8))
        scatter = plt.scatter(X_embedded[:, 0], X_embedded[:, 1], 
                          c=y_for_tsne, cmap='coolwarm', 
                          s=20, 
                          alpha=0.7)
        plt.title("t-SNE G√∂rselle≈ütirmesi", fontsize=14)
        plt.xlabel("TSNE-1", fontsize=12)
        plt.ylabel("TSNE-2", fontsize=12)
        plt.colorbar(scatter, label='Failure')
        plt.grid(alpha=0.3)
        plt.tight_layout()
        plt.savefig("tsne_plot.png", dpi=300)
        plt.close()
        print("t-SNE g√∂rselle≈ütirmesi ba≈üarƒ±yla olu≈üturuldu.")
    else:
        raise ValueError("Her iki sƒ±nƒ±ftan da yeterli √∂rnek bulunamadƒ±")
except Exception as e:
    print(f"t-SNE g√∂rselle≈ütirmesi sƒ±rasƒ±nda hata olu≈ütu: {str(e)}")
    plt.figure(figsize=(10, 8))
    plt.text(0.5, 0.5, f"t-SNE g√∂rselle≈ütirmesi olu≈üturulamadƒ±\nHata: {str(e)}", 
             ha='center', va='center', fontsize=12)
    plt.axis('off')
    plt.savefig("tsne_plot.png", dpi=300)
    plt.close()

# -------------------- LSTM AUTOENCODER ANOMALƒ∞ TESPƒ∞Tƒ∞ --------------------
print("LSTM autoencoder hazƒ±rlanƒ±yor...")
normal_data = telemetry_hourly[telemetry_hourly['failure'] == 0][features].values
abnormal_data = telemetry_hourly[telemetry_hourly['failure'] == 1][features].values
max_normal = min(8000, len(normal_data))
normal_data = normal_data[:max_normal]
scaler_lstm = MinMaxScaler()
normal_scaled = scaler_lstm.fit_transform(normal_data)
seq_length = 10
X_seq = []
for i in range(len(normal_scaled) - seq_length):
    X_seq.append(normal_scaled[i:i+seq_length])
X_seq = np.array(X_seq)
print(f"LSTM autoencoder eƒüitiliyor... (Epochs: {epochs})")
input_layer = Input(shape=(seq_length, X_seq.shape[2]))
x = LSTM(8, activation='relu', return_sequences=True, kernel_regularizer=l2(0.01))(input_layer)
x = Dropout(0.5)(x)
x = LSTM(4, activation='relu', return_sequences=False, kernel_regularizer=l2(0.01))(x)
x = RepeatVector(seq_length)(x)
x = LSTM(4, activation='relu', return_sequences=True, kernel_regularizer=l2(0.01))(x)
x = Dropout(0.5)(x)
x = LSTM(8, activation='relu', return_sequences=True, kernel_regularizer=l2(0.01))(x)
output_layer = TimeDistributed(Dense(X_seq.shape[2]))(x)
model_lstm = tf.keras.Model(inputs=input_layer, outputs=output_layer)
model_lstm.compile(optimizer='adam', loss='mse')
early_stopping = EarlyStopping(monitor='val_loss', patience=1, restore_best_weights=True)
history = model_lstm.fit(
    X_seq, X_seq, 
    epochs=epochs, 
    batch_size=64, 
    validation_split=0.1, 
    verbose=1,
    callbacks=[early_stopping]
)
X_pred = model_lstm.predict(X_seq)
mse = np.mean(np.power(X_seq - X_pred, 2), axis=(1, 2))
threshold = np.percentile(mse, 95)
print("üî∫ Anomali e≈üik deƒüeri:", threshold)
try:
    plt.figure(figsize=(14, 8))
    plt.subplot(2, 1, 1)
    plt.plot(mse, label='Reconstruction Error')
    plt.axhline(y=threshold, color='red', linestyle='--', label=f'Threshold ({threshold:.4f})')
    plt.title("LSTM Autoencoder Anomali Tespiti", fontsize=14)
    plt.xlabel("Dizi ƒ∞ndeksi", fontsize=12)
    plt.ylabel("Hata (MSE)", fontsize=12)
    plt.legend()
    plt.grid(alpha=0.3)
    plt.subplot(2, 1, 2)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title("LSTM Autoencoder Eƒüitim Ge√ßmi≈üi", fontsize=14)
    plt.xlabel("Epoch", fontsize=12)
    plt.ylabel("Loss", fontsize=12)
    plt.legend()
    plt.grid(alpha=0.3)
    plt.tight_layout()
    plt.savefig("lstm_anomaly_plot.png", dpi=300)
    plt.close()
except Exception as e:
    print(f"LSTM anomali plotu olu≈üturulamadƒ±: {str(e)}")

try:
    plt.figure(figsize=(10, 6))
    plt.hist(mse, bins=50, alpha=0.7, color='blue')
    plt.axvline(x=threshold, color='red', linestyle='--', 
                label=f'Anomali E≈üiƒüi ({threshold:.4f})')
    plt.title("MSE Daƒüƒ±lƒ±mƒ± ve Anomali E≈üiƒüi", fontsize=14)
    plt.xlabel("MSE Deƒüeri", fontsize=12)
    plt.ylabel("Frekans", fontsize=12)
    plt.legend()
    plt.grid(alpha=0.3)
    plt.tight_layout()
    plt.savefig("mse_histogram.png", dpi=300)
    plt.close()
except Exception as e:
    print(f"MSE histogramƒ± olu≈üturulamadƒ±: {str(e)}")

# -------------------- PDF RAPORU --------------------
print(f"üìÑ PDF raporu olu≈üturuluyor: {report_filename}")
training_times = {}
for name, model in models.items():
    start = time.time()
    if name == 'XGBoost':
        # Use xgboost.train for training time measurement
        params = {
            'max_depth': 3,
            'learning_rate': 0.05,
            'subsample': 0.7,
            'colsample_bytree': 0.7,
            'min_child_weight': 5,
            'reg_lambda': 30,
            'objective': 'binary:logistic',
            'eval_metric': 'logloss',
            'random_state': 42
        }
        dtrain = xgb.DMatrix(X_train, label=y_train, feature_names=features)
        dtest = xgb.DMatrix(X_test, label=y_test, feature_names=features)
        evals = [(dtrain, 'train'), (dtest, 'test')]
        bst = xgb.train(
            params,
            dtrain,
            num_boost_round=500,
            evals=evals,
            early_stopping_rounds=20,
            verbose_eval=False
        )
    else:
        model.fit(X_train, y_train)
    end = time.time()
    training_times[name] = end - start

try:
    plt.figure(figsize=(10, 6))
    names = list(training_times.keys())
    times = list(training_times.values())
    sorted_indices = np.argsort(times)
    sorted_names = [names[i] for i in sorted_indices]
    sorted_times = [times[i] for i in sorted_indices]
    bars = plt.bar(sorted_names, sorted_times, color=['blue', 'green', 'red', 'purple'])
    plt.title('Model Eƒüitim S√ºreleri Kar≈üƒ±la≈ütƒ±rmasƒ±', fontsize=14)
    plt.ylabel('S√ºre (saniye)', fontsize=12)
    plt.grid(axis='y', alpha=0.3)
    for bar in bars:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height + 0.1,
                f'{height:.2f}s',
                ha='center', va='bottom', fontsize=10)
    plt.tight_layout()
    plt.savefig('training_times_comparison.png', dpi=300)
    plt.close()
except Exception as e:
    print(f"Eƒüitim s√ºreleri plotu olu≈üturulamadƒ±: {str(e)}")

with PdfPages(report_filename) as pdf:
    try:
        plt.figure(figsize=(12, 8))
        plt.text(0.5, 0.8, f"Model Kar≈üƒ±la≈ütƒ±rma Raporu", 
                 fontsize=24, ha='center', fontweight='bold')
        plt.text(0.5, 0.6, f"Olu≈üturulma Tarihi: {datetime.datetime.now().strftime('%d-%m-%Y %H:%M')}", 
                 fontsize=16, ha='center')
        plt.text(0.5, 0.5, f"Veri Y√ºzdesi: %{data_fraction*100:.2f}", 
                 fontsize=14, ha='center')
        plt.text(0.5, 0.4, f"LSTM Epoch Sayƒ±sƒ±: {epochs}", 
                 fontsize=14, ha='center')
        plt.axis('off')
        pdf.savefig()
        plt.close()
    except Exception as e:
        print(f"PDF kapak sayfasƒ± olu≈üturulamadƒ±: {str(e)}")

    try:
        plt.figure(figsize=(12, 6))
        metrics_data = metrics_table.melt(id_vars='Metric', var_name='Model', value_name='Value')
        ax = sns.barplot(x='Metric', y='Value', hue='Model', data=metrics_data)
        plt.title("Model Performans Kar≈üƒ±la≈ütƒ±rmasƒ±", fontsize=14)
        plt.ylim(0, 1)
        plt.legend(loc='lower right')
        for container in ax.containers:
            ax.bar_label(container, fmt='%.2f', fontsize=8)
        plt.tight_layout()
        pdf.savefig()
        plt.close()
    except Exception as e:
        print(f"Performans kar≈üƒ±la≈ütƒ±rmasƒ± plotu olu≈üturulamadƒ±: {str(e)}")

    for img_path, title in [
        ("roc_comparison.png", "ROC Eƒürisi Kar≈üƒ±la≈ütƒ±rmasƒ±"),
        ("confusion_matrix_comparison.png", "Confusion Matrix Kar≈üƒ±la≈ütƒ±rmasƒ±"),
        ("tree_models_feature_importance.png", "Aƒüa√ß Tabanlƒ± Modeller √ñznitelik √ñnemi Kar≈üƒ±la≈ütƒ±rmasƒ±"),
        ("tsne_plot.png", "t-SNE G√∂rselle≈ütirmesi"),
        ("lstm_anomaly_plot.png", "LSTM Anomali Tespiti"),
        ("mse_histogram.png", "MSE Daƒüƒ±lƒ±mƒ± ve Anomali E≈üiƒüi"),
        ("training_times_comparison.png", "Model Eƒüitim S√ºreleri Kar≈üƒ±la≈ütƒ±rmasƒ±")
    ]:
        if os.path.exists(img_path):
            try:
                img = plt.imread(img_path)
                plt.figure(figsize=(10 if "confusion_matrix" not in img_path else 12, 8))
                plt.imshow(img)
                plt.axis('off')
                plt.title(title)
                pdf.savefig()
                plt.close()
            except Exception as e:
                print(f"PDF'ye {img_path} eklenemedi: {str(e)}")

    for name in models.keys():
        model_name_lower = name.replace(' ', '_').lower()
        for img_path, title in [
            (f"shap_summary_{model_name_lower}.png", f"SHAP Summary - {name}"),
            (f"shap_waterfall_{model_name_lower}.png", f"SHAP Waterfall - {name}"),
            (f"shap_force_{model_name_lower}.png", f"SHAP Force Plot - {name}"),
            (f"{model_name_lower}_feature_importance.png", f"{name} √ñznitelik √ñnemi")
        ]:
            if os.path.exists(img_path):
                try:
                    img = plt.imread(img_path)
                    plt.figure(figsize=(10, 8))
                    plt.imshow(img)
                    plt.axis('off')
                    plt.title(title)
                    pdf.savefig()
                    plt.close()
                except Exception as e:
                    print(f"PDF'ye {img_path} eklenemedi: {str(e)}")

    for path, name in zip(
        ['learning_curve_random_forest.png', 'learning_curve_catboost.png', 'learning_curve_xgboost.png'],
        ['Random Forest', 'CatBoost', 'XGBoost']
    ):
        if os.path.exists(path):
            try:
                img = plt.imread(path)
                plt.figure(figsize=(10, 8))
                plt.imshow(img)
                plt.axis('off')
                plt.title(f"{name} √ñƒürenme Eƒürisi")
                pdf.savefig()
                plt.close()
            except Exception as e:
                print(f"PDF'ye {path} eklenemedi: {str(e)}")

# -------------------- SONU√á --------------------
end_time = time.time()
execution_time = end_time - start_time
print(f"\n‚úÖ ƒ∞≈ülem tamamlandƒ±! Toplam s√ºre: {execution_time:.2f} saniye ({execution_time/60:.2f} dakika)")

model_names = list(results.keys())
best_model_by_f1 = max(model_names, key=lambda x: results[x]['F1 Score'])
best_model_by_auc = max(model_names, key=lambda x: results[x]['AUC'])

print(f"\nüìä √ñZET Bƒ∞LGƒ∞LER:")
print(f"‚òëÔ∏è En Y√ºksek F1 Skoru: {best_model_by_f1} ({results[best_model_by_f1]['F1 Score']:.4f})")
print(f"‚òëÔ∏è En Y√ºksek AUC: {best_model_by_auc} ({results[best_model_by_auc]['AUC']:.4f})")
print(f"‚òëÔ∏è Anomali E≈üiƒüi: {threshold:.4f}")
print(f"‚òëÔ∏è Kullanƒ±lan veri y√ºzdesi: %{data_fraction*100:.2f}")
print(f"‚òëÔ∏è LSTM epoch sayƒ±sƒ±: {epochs}")

try:
    if platform.system() == 'Windows':
        os.startfile(report_filename)
    elif platform.system() == 'Darwin':
        subprocess.call(['open', report_filename])
    else:
        subprocess.call(['xdg-open', report_filename])
    print(f"üìÅ PDF raporu a√ßƒ±ldƒ±: {report_filename}")
except Exception as e:
    print(f"PDF a√ßƒ±lƒ±rken hata: {str(e)}")
    print(f"Raporun konumu: {os.path.abspath(report_filename)}")

cleanup = messagebox.askyesno("Temizleme", "Ge√ßici g√∂rsel dosyalarƒ± silinsin mi?")
if cleanup:
    temp_files = [
        "roc_comparison.png", "confusion_matrix_comparison.png",
        "tsne_plot.png", "lstm_anomaly_plot.png", "mse_histogram.png",
        "training_times_comparison.png", "tree_models_feature_importance.png"
    ]
    for name in models.keys():
        model_name_lower = name.replace(' ', '_').lower()
        temp_files.append(f"shap_summary_{model_name_lower}.png")
        temp_files.append(f"shap_waterfall_{model_name_lower}.png")
        temp_files.append(f"shap_force_{model_name_lower}.png")
        temp_files.append(f"{model_name_lower}_feature_importance.png")
    for path in ['learning_curve_random_forest.png', 'learning_curve_catboost.png', 'learning_curve_xgboost.png']:
        temp_files.append(path)
    
    for file in temp_files:
        if os.path.exists(file):
            try:
                os.remove(file)
            except Exception as e:
                print(f"Dosya silinemedi: {file} - {str(e)}")
    
    print("üßπ Ge√ßici dosyalar temizlendi.")

Veri y√ºkleniyor...
Veri √∂n i≈üleniyor...
Zaman serisi √∂zellikleri ekleniyor...
Y√ºksek korelasyonlu √∂zellikler eleniyor...
Elenen √∂zellikler: []
Kalan √∂zellikler: ['volt', 'rotate', 'pressure', 'vibration', 'volt_lag_1', 'volt_lag_2', 'rotate_lag_1', 'rotate_lag_2', 'pressure_lag_1', 'pressure_lag_2', 'vibration_lag_1', 'vibration_lag_2', 'volt_ma_3', 'rotate_ma_3', 'pressure_ma_3', 'vibration_ma_3', 'volt_std_3', 'rotate_std_3', 'pressure_std_3', 'vibration_std_3', 'volt_diff', 'rotate_diff', 'pressure_diff', 'vibration_diff']
Model hazƒ±rlanƒ±yor...
SMOTE uygulanƒ±yor... (Veri boyutu: 30000)
Logistic Regression, Random Forest, CatBoost ve XGBoost modelleri eƒüitiliyor...

‚û°Ô∏è Logistic Regression modeli eƒüitiliyor...
üîé Logistic Regression i√ßin SHAP deƒüerleri hesaplanƒ±yor...




  0%|          | 0/100 [00:00<?, ?it/s]

‚úÖ SHAP ba≈üarƒ±yla tamamlandƒ± - Logistic Regression

‚û°Ô∏è Random Forest modeli eƒüitiliyor...
üîé Random Forest i√ßin SHAP deƒüerleri hesaplanƒ±yor...
‚úÖ SHAP ba≈üarƒ±yla tamamlandƒ± - Random Forest

‚û°Ô∏è CatBoost modeli eƒüitiliyor...
üîé CatBoost i√ßin SHAP deƒüerleri hesaplanƒ±yor...
‚úÖ SHAP ba≈üarƒ±yla tamamlandƒ± - CatBoost

‚û°Ô∏è XGBoost modeli eƒüitiliyor...
üîé XGBoost i√ßin SHAP deƒüerleri hesaplanƒ±yor...
‚úÖ SHAP ba≈üarƒ±yla tamamlandƒ± - XGBoost
