In [1]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, log_loss

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input

In [2]:
data= pd.read_csv(r"C:\Users\hsynt\Downloads\Selcuk Abi\Selcuk Abi\VeriSeti2.csv")

In [3]:
# Özellik ve Hedef Ayrımı
X = data.drop(columns=['Target_10', 'adjclose'])
y = data['Target_10']

# Zaman Serisi Formatına Getirme
def create_sequences(data, target, seq_length):
    sequences, targets = [], []
    for i in range(len(data) - seq_length):
        sequences.append(data[i:i + seq_length])
        targets.append(target[i + seq_length])
    return np.array(sequences), np.array(targets)

SEQ_LENGTH = 10
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_seq, y_seq = create_sequences(X_scaled, y.values, SEQ_LENGTH)

# Eğitim ve test kümelerinin oluşturulması
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42, stratify=y_seq)

# PCA ve LDA Veri Hazırlığı
X_flat = X_seq.reshape(X_seq.shape[0], -1)  # Düzleştirilmiş veri

pca = PCA(n_components=10)
X_train_pca = pca.fit_transform(X_train.reshape(X_train.shape[0], -1))
X_test_pca = pca.transform(X_test.reshape(X_test.shape[0], -1))

lda = LDA(n_components=1)
X_train_lda = lda.fit_transform(X_train.reshape(X_train.shape[0], -1), y_train)
X_test_lda = lda.transform(X_test.reshape(X_test.shape[0], -1))

# LSTM Modeli Tanımlama (Özellik Çıkartma İçin)
def build_lstm_feature_extractor(input_shape):
    model = Sequential([
        Input(shape=input_shape),
        LSTM(64, activation='relu', return_sequences=False),
        Dropout(0.2),
        Dense(32, activation='relu')  # Ara katman özellik çıkartıcı
    ])
    return model

# Performans Hesaplama
results = []
threshold = 60  # Underfitting için eşik

for reduction, X_tr, X_te, y_tr, y_te in zip(
    ['No PCA/LDA', 'PCA', 'LDA'],
    [X_train, X_train_pca, X_train_lda],
    [X_test, X_test_pca, X_test_lda],
    [y_train, y_train, y_train],
    [y_test, y_test, y_test]
):
    if reduction == 'No PCA/LDA':
        X_tr = X_tr.reshape(X_tr.shape[0], X_tr.shape[1], X_tr.shape[2])  # LSTM için 3D giriş
        X_te = X_te.reshape(X_te.shape[0], X_te.shape[1], X_te.shape[2])

        # LSTM Özellik Çıkartıcı Model
        lstm_model = build_lstm_feature_extractor(X_tr.shape[1:])
        X_train_features = lstm_model.predict(X_tr)
        X_test_features = lstm_model.predict(X_te)
    else:
        # PCA ve LDA için düzleştirilmiş veriler kullanılır
        X_train_features = X_tr
        X_test_features = X_te

    # Random Forest Modeli
    rf_model = RandomForestClassifier(n_estimators=200, random_state=42)
    rf_model.fit(X_train_features, y_tr)

    # Eğitim Performansı
    y_train_pred_rf = rf_model.predict_proba(X_train_features)[:, 1]
    train_accuracy = accuracy_score(y_tr, y_train_pred_rf.round()) * 100
    train_loss = log_loss(y_tr, y_train_pred_rf)

    # Test Performansı
    y_test_pred_rf = rf_model.predict_proba(X_test_features)[:, 1]
    test_accuracy = accuracy_score(y_te, y_test_pred_rf.round()) * 100
    test_loss = log_loss(y_te, y_test_pred_rf)

    # Aşırı ve Yetersiz Öğrenme Kontrolü
    overfit = train_accuracy - test_accuracy > 5  # %5'ten fazla fark varsa aşırı öğrenme
    underfit = train_accuracy < threshold and test_accuracy < threshold

    results.append({
        'Reduction': reduction,
        'Model': 'LSTM + Random Forest',
        'Train Accuracy (%)': train_accuracy,
        'Test Accuracy (%)': test_accuracy,
        'Train Loss': train_loss,
        'Test Loss': test_loss,
        'Overfitting': overfit,
        'Underfitting': underfit
    })

# Performans Sonuçlarını Görselleştirme
results_df = pd.DataFrame(results)

# Performans Sonuçları Tablosu
print("Performans Sonuçları Tablosu:")
print(results_df)

# Sonuçları Kaydetme
results_df.to_csv("lstm_rf_overfitting_underfitting_results.csv", index=False)
print("\nSonuçlar 'lstm_rf_overfitting_underfitting_results.csv' dosyasına kaydedildi.")

[1m1249/1249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Performans Sonuçları Tablosu:
    Reduction                 Model  Train Accuracy (%)  Test Accuracy (%)  \
0  No PCA/LDA  LSTM + Random Forest          100.000000          84.437550   
1         PCA  LSTM + Random Forest          100.000000          82.676141   
2         LDA  LSTM + Random Forest           99.969974          80.204163   

   Train Loss  Test Loss  Overfitting  Underfitting  
0    0.102095   0.371164         True         False  
1    0.110370   0.402649         True         False  
2    0.082807   1.554757         True         False  

Sonuçlar 'lstm_rf_overfitting_underfitting_results.csv' dosyasına kaydedildi.
