In [1]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Input

In [2]:
data= pd.read_csv(r"C:\Users\hsynt\Downloads\Selcuk Abi\Selcuk Abi\VeriSeti2.csv")

In [3]:
# Özellik ve Hedef Ayrımı
X = data.drop(columns=['Target_10', 'adjclose'])
y = data['Target_10']

# Zaman Serisi Formatına Getirme
def create_sequences(data, target, seq_length):
    sequences, targets = [], []
    for i in range(len(data) - seq_length):
        sequences.append(data[i:i + seq_length])
        targets.append(target[i + seq_length])
    return np.array(sequences), np.array(targets)

SEQ_LENGTH = 10
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_seq, y_seq = create_sequences(X_scaled, y.values, SEQ_LENGTH)

# Eğitim ve test kümelerinin oluşturulması
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42, stratify=y_seq)

# PCA ve LDA Veri Hazırlığı
X_flat = X_seq.reshape(X_seq.shape[0], -1)  # Düzleştirilmiş veri

pca = PCA(n_components=10)
X_train_pca = pca.fit_transform(X_train.reshape(X_train.shape[0], -1))
X_test_pca = pca.transform(X_test.reshape(X_test.shape[0], -1))

lda = LDA(n_components=1)
X_train_lda = lda.fit_transform(X_train.reshape(X_train.shape[0], -1), y_train)
X_test_lda = lda.transform(X_test.reshape(X_test.shape[0], -1))

# Performans Metriklerini Hesaplama
def calculate_metrics(y_true, y_pred, y_pred_proba):
    return {
        "Accuracy": accuracy_score(y_true, y_pred) * 100,
        "Precision": precision_score(y_true, y_pred) * 100,
        "Recall": recall_score(y_true, y_pred) * 100,
        "F1-Score": f1_score(y_true, y_pred) * 100,
        "AUC": roc_auc_score(y_true, y_pred_proba) * 100
    }

# LSTM Modeli Tanımlama (Sadece Özellik Çıkartma İçin)
def build_lstm_feature_extractor(input_shape):
    model = Sequential([
        Input(shape=input_shape),
        LSTM(64, activation='relu', return_sequences=False),
        Dropout(0.2)
    ])
    return model

# Performansları Değerlendirme
results_train = []
results_test = []

for reduction, X_tr, X_te, y_tr, y_te in zip(
    ['None', 'PCA', 'LDA'],
    [X_train, X_train_pca, X_train_lda],
    [X_test, X_test_pca, X_test_lda],
    [y_train, y_train, y_train],
    [y_test, y_test, y_test]
):
    if reduction == 'None':
        X_tr = X_tr.reshape(X_tr.shape[0], X_tr.shape[1], X_tr.shape[2])  # LSTM için 3D giriş
        X_te = X_te.reshape(X_te.shape[0], X_te.shape[1], X_te.shape[2])

        # LSTM Özellik Çıkartıcı
        lstm_model = build_lstm_feature_extractor(X_tr.shape[1:])
        X_train_features = lstm_model.predict(X_tr)
        X_test_features = lstm_model.predict(X_te)
    else:
        # PCA veya LDA için düzleştirilmiş veri kullanılır
        X_train_features = X_tr
        X_test_features = X_te

    # Random Forest Modeli
    rf_model = RandomForestClassifier(n_estimators=200, random_state=42)
    rf_model.fit(X_train_features, y_tr)

    # Eğitim Performansı
    y_train_pred_rf = rf_model.predict(X_train_features)
    y_train_proba_rf = rf_model.predict_proba(X_train_features)[:, 1]
    train_metrics = calculate_metrics(y_tr, y_train_pred_rf, y_train_proba_rf)
    train_metrics['Reduction'] = reduction
    train_metrics['Model'] = 'LSTM + Random Forest'
    results_train.append(train_metrics)

    # Test Performansı
    y_test_pred_rf = rf_model.predict(X_test_features)
    y_test_proba_rf = rf_model.predict_proba(X_test_features)[:, 1]
    test_metrics = calculate_metrics(y_te, y_test_pred_rf, y_test_proba_rf)
    test_metrics['Reduction'] = reduction
    test_metrics['Model'] = 'LSTM + Random Forest'
    results_test.append(test_metrics)

# Eğitim ve Test Sonuçlarını Görselleştirme
train_df = pd.DataFrame(results_train).set_index(["Reduction", "Model"])
test_df = pd.DataFrame(results_test).set_index(["Reduction", "Model"])

# Eğitim Sonuçları Tablosu
print("Eğitim Kümesi Performans Sonuçları:")
print(train_df)

# Test Sonuçları Tablosu
print("\nTest Kümesi Performans Sonuçları:")
print(test_df)

# Sonuçları Kaydetme
train_df.to_csv("lstm_rf_pca_lda_train_performance.csv")
test_df.to_csv("lstm_rf_pca_lda_test_performance.csv")
print("\nSonuçlar 'lstm_rf_pca_lda_train_performance.csv' ve 'lstm_rf_pca_lda_test_performance.csv' dosyalarına kaydedildi.")

[1m1249/1249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Eğitim Kümesi Performans Sonuçları:
                                  Accuracy  Precision      Recall   F1-Score  \
Reduction Model                                                                
None      LSTM + Random Forest  100.000000  100.00000  100.000000  100.00000   
PCA       LSTM + Random Forest  100.000000  100.00000  100.000000  100.00000   
LDA       LSTM + Random Forest   99.969974   99.96027   99.986753   99.97351   

                                       AUC  
Reduction Model                             
None      LSTM + Random Forest  100.000000  
PCA       LSTM + Random Forest  100.000000  
LDA       LSTM + Random Forest   99.999829  

Test Kümesi Performans Sonuçları:
                                 Accuracy  Precision     Recall   F1-Score  \
Reduction Model                                                          