In [None]:
import pandas as pd
import numpy as np
from sklearn.impute import KNNImputer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

# 📌 Veriyi Yükleme ve Filtreleme
veri_yolu = "data/akciger_kanseri_veriseti.csv"
veriler = pd.read_csv(veri_yolu)

parametreler = [
    'Yaş', 'DLCO%', 'FEV1/FVC%', 'Ext', 'Major komplikasyon',
    'Vasküler inv', 'Nekroz', 'STAS', 'T (9)', 'N (9)', 'M (9)', 'Nüks'
]

veriler = veriler[parametreler]

# Virgülleri noktaya dönüştür ve sayısal değerlere çevir
veriler = veriler.replace(",", ".", regex=True).apply(pd.to_numeric, errors='coerce')

# Eksik Verileri Doldur
imputer = KNNImputer(n_neighbors=5)
veriler = pd.DataFrame(imputer.fit_transform(veriler), columns=parametreler)

# 📌 Özellikler ve Etiketleri Ayırma
X = veriler.drop('Nüks', axis=1)
y = veriler['Nüks']

# Eğitim ve Test Setlerini Oluştur
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# 📌 Gradient Boosting Classifier
model_gb = GradientBoostingClassifier(random_state=42)
model_gb.fit(X_train, y_train)
gb_pred = model_gb.predict(X_test)
print(f"Gradient Boosting doğruluk: %{accuracy_score(y_test, gb_pred)*100:.2f}")

# Özelliklerin önemini yazdır
for feature, importance in zip(X.columns, model_gb.feature_importances_):
    print(f"{feature}: {importance:.3f}")

# 📌 Decision Tree Classifier
model_tree = DecisionTreeClassifier(random_state=42)
model_tree.fit(X_train, y_train)

tree_pred = model_tree.predict(X_test)
print(f"Decision Tree doğruluk oranı: {accuracy_score(y_test, tree_pred)*100:.2f}%")

# 📌 Yapay Sinir Ağı için Ölçekleme
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 📌 Yapay Sinir Ağı Modeli
model_nn = tf.keras.Sequential([
    Dense(32, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dropout(0.2),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])

model_nn.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])

# 📌 Model Eğitimi
model_nn.fit(X_train_scaled, y_train, epochs=100, batch_size=32, verbose=1,
             validation_split=0.1, callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)])

# 📌 Model Değerlendirme
loss, accuracy = model_nn.evaluate(X_test_scaled, y_test, verbose=0)
print(f"Sinir Ağı doğruluk oranı: {accuracy*100:.2f}%")

# 📌 Modeli Kaydet
model_nn.save("lung_cancer_recurrence_model.h5")