In [1]:
# Gerekli Kütüphaneleri Yükleyin
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import log_loss
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

In [2]:
data= pd.read_csv(r"C:\Users\hsynt\Downloads\Selcuk Abi\Selcuk Abi\VeriSeti2.csv")

In [3]:
# Özellik ve Hedef Ayrımı
X = data.drop(columns=['Target_10', 'adjclose'])
y = data['Target_10']

# Eğitim ve Test Setlerine Bölme
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Özellikleri Ölçeklendirme
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# PCA ve LDA Dönüşümleri
pca = PCA(n_components=10)
lda = LDA()

X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

X_train_lda = lda.fit_transform(X_train_scaled, y_train)
X_test_lda = lda.transform(X_test_scaled)

# Algoritmalar ve İsimleri
models = {
    "Random Forest": RandomForestClassifier(random_state=42, n_estimators=200),
    "XGBoost": XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss'),
    "SVM": SVC(probability=True, random_state=42),
    "k-NN": KNeighborsClassifier(n_neighbors=5),
    "Logistic Regression": LogisticRegression(random_state=42, max_iter=1000)
}

# Sonuçları Saklamak İçin Liste
overfitting_results = []

# PCA, LDA ve No Reduction Durumlarını İçeren Modellerin Eğitimi ve Performans Sonuçlarının Hesaplanması
reductions = {
    "No Reduction": (X_train_scaled, X_test_scaled),
    "With PCA": (X_train_pca, X_test_pca),
    "With LDA": (X_train_lda, X_test_lda)
}

for reduction_name, (X_tr, X_te) in reductions.items():
    for model_name, model in models.items():
        # Modeli Eğitme
        model.fit(X_tr, y_train)

        # Eğitim ve Test Kümesi Tahminleri
        y_train_pred = model.predict(X_tr)
        y_test_pred = model.predict(X_te)

        # Tahmin Olasılıkları
        y_train_proba = model.predict_proba(X_tr) if hasattr(model, "predict_proba") else None
        y_test_proba = model.predict_proba(X_te) if hasattr(model, "predict_proba") else None

        # Kayıp (Loss) Hesaplama
        train_loss = log_loss(y_train, y_train_proba) if y_train_proba is not None else log_loss(y_train, y_train_pred)
        test_loss = log_loss(y_test, y_test_proba) if y_test_proba is not None else log_loss(y_test, y_test_pred)

        # Overfitting ve Underfitting Kontrolü
        overfitting = (model.score(X_tr, y_train) - model.score(X_te, y_test)) > 0.1
        underfitting = (model.score(X_tr, y_train) < 0.7) and (model.score(X_te, y_test) < 0.7)

        # Sonuçları Listeye Ekleyin
        overfitting_results.append({
            "Reduction": reduction_name,
            "Model": model_name,
            "Train Accuracy (%)": model.score(X_tr, y_train) * 100,
            "Test Accuracy (%)": model.score(X_te, y_test) * 100,
            "Train Loss": train_loss,
            "Test Loss": test_loss,
            "Overfitting": "Yes" if overfitting else "No",
            "Underfitting": "Yes" if underfitting else "No"
        })

# Sonuçları DataFrame Olarak Gösterme
overfitting_df = pd.DataFrame(overfitting_results)

# Performans Sonuçları Tablosunu Görüntüleme
print("Performans Sonuçları Tablosu (Overfitting & Underfitting):")
print(overfitting_df)

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



Performans Sonuçları Tablosu (Overfitting & Underfitting):
       Reduction                Model  Train Accuracy (%)  Test Accuracy (%)  \
0   No Reduction        Random Forest          100.000000          84.730839   
1   No Reduction              XGBoost           91.341872          83.810286   
2   No Reduction                  SVM           82.205934          82.049230   
3   No Reduction                 k-NN           86.223545          80.608365   
4   No Reduction  Logistic Regression           82.013309          81.979188   
5       With PCA        Random Forest          100.000000          80.508305   
6       With PCA              XGBoost           88.382449          79.647789   
7       With PCA                  SVM           79.844399          79.337603   
8       With PCA                 k-NN           84.890179          79.027416   
9       With PCA  Logistic Regression           79.461650          79.357615   
10      With LDA        Random Forest           99.882424    