<a href="https://colab.research.google.com/github/damlakaynarca/Makale-Uygulama-veri-madencili-i/blob/main/Veri_Madencili%C4%9Fi_Dersi_Makale_Uygulama.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
# Gerekli Kütüphanelerin Yüklenmesi
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score, cohen_kappa_score
from boruta import BorutaPy
import zipfile
import io
import requests

# 1. Veri Setinin Yüklenmesi (Bank Marketing veri seti)
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00222/bank-additional.zip'
response = requests.get(url)
with zipfile.ZipFile(io.BytesIO(response.content)) as z:
    with z.open('bank-additional/bank-additional.csv') as f:
        df = pd.read_csv(f, sep=';', header=0)

# Veriyi Hazırlama
X = df.drop('y', axis=1)  # Hedef sütunu kaldırma
y = df['y'].apply(lambda x: 1 if x == 'yes' else 0)  # Binary encoding
X = pd.get_dummies(X)  # Kategorik değişkenleri sayısallaştırma

# Eğitim ve Test Kümesi
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Özellik Seçimi - Random Forest ile Feature Importance
print("Random Forest Feature Importance ile Özellik Seçimi")
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
feature_importances = rf.feature_importances_
important_features_rf = X_train.columns[feature_importances > np.mean(feature_importances)]

# 3. Boruta ile Özellik Seçimi
print("Boruta Özellik Seçimi")
boruta = BorutaPy(rf, n_estimators='auto', verbose=0, random_state=42)
boruta.fit(X_train.values, y_train.values)
important_features_boruta = X_train.columns[boruta.support_]

# 4. RFE (Recursive Feature Elimination) ile Özellik Seçimi
print("Recursive Feature Elimination (RFE)")
rfe = RFE(rf, n_features_to_select=10)
rfe.fit(X_train, y_train)
important_features_rfe = X_train.columns[rfe.support_]

# 5. Model Eğitimi ve Değerlendirme
models = {
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
    'SVM': SVC(kernel='rbf', C=1, gamma='scale'),
    'KNN': KNeighborsClassifier(n_neighbors=5),
    'LDA': LinearDiscriminantAnalysis()
}

# Fonksiyon: Model Performansını Değerlendirme
def evaluate_model(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    kappa = cohen_kappa_score(y_test, y_pred)
    return accuracy, kappa

# Seçilen Özelliklerle Modellerin Performansı
for method, features in zip(['Random Forest', 'Boruta', 'RFE'],
                            [important_features_rf, important_features_boruta, important_features_rfe]):
    print(f"\n{method} ile Seçilen Özelliklerle Performans:")
    for model_name, model in models.items():
        acc, kappa = evaluate_model(model, X_train[features], X_test[features], y_train, y_test)
        print(f"{model_name}: Accuracy = {acc:.4f}, Kappa = {kappa:.4f}")

Random Forest Feature Importance ile Özellik Seçimi
Boruta Özellik Seçimi
Recursive Feature Elimination (RFE)

Random Forest ile Seçilen Özelliklerle Performans:
Random Forest: Accuracy = 0.9029, Kappa = 0.4757
SVM: Accuracy = 0.8993, Kappa = 0.2753
KNN: Accuracy = 0.8859, Kappa = 0.3776
LDA: Accuracy = 0.9005, Kappa = 0.4276

Boruta ile Seçilen Özelliklerle Performans:
Random Forest: Accuracy = 0.8956, Kappa = 0.4637
SVM: Accuracy = 0.8883, Kappa = 0.0166
KNN: Accuracy = 0.8847, Kappa = 0.3677
LDA: Accuracy = 0.8908, Kappa = 0.4161

RFE ile Seçilen Özelliklerle Performans:
Random Forest: Accuracy = 0.9029, Kappa = 0.4648
SVM: Accuracy = 0.8993, Kappa = 0.2753
KNN: Accuracy = 0.8859, Kappa = 0.3776
LDA: Accuracy = 0.9029, Kappa = 0.4534
