In [1]:
# -*- coding: utf-8 -*-
"""
Created on Fri Dec 20 19:44:27 2024

@author: Buse Yener
"""

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Veri setini yükle
data_path = r"C:\\Users\\Buse Yener\\big_data\\standardized_data.csv"
data = pd.read_csv(data_path)

# Bağımsız ve bağımlı değişkenler
X = data.drop('Class', axis=1)  # 'Class' sütunu hedef değişken
y = data['Class']

# Veriyi eğitim ve test setlerine ayır
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Veriyi standardize et
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Logistic Regression Modeli
lr_model = LogisticRegression(max_iter=1000, random_state=42)  # max_iter parametresi ile modelin daha stabil eğitilmesi sağlanır
lr_model.fit(X_train_scaled, y_train)
y_pred_lr = lr_model.predict(X_test_scaled)

# Random Forest Modeli
rf_model = RandomForestClassifier(random_state=42)  # random_state parametresi sonuçların tekrarlanabilirliği için eklenmiştir
rf_model.fit(X_train_scaled, y_train)
y_pred_rf = rf_model.predict(X_test_scaled)

# Decision Tree Modeli
dt_model = DecisionTreeClassifier(random_state=42)  # random_state parametresi ile aynı sebeple eklenmiştir
dt_model.fit(X_train_scaled, y_train)
y_pred_dt = dt_model.predict(X_test_scaled)

# Performans metriklerini hesapla
def evaluate_model(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    return accuracy, precision, recall, f1

# Logistic Regression metrikleri
lr_metrics = evaluate_model(y_test, y_pred_lr)
print("Logistic Regression Performansı:")
print(f"Accuracy: {lr_metrics[0]:.4f}")
print(f"Precision: {lr_metrics[1]:.4f}")
print(f"Recall: {lr_metrics[2]:.4f}")
print(f"F1 Skoru: {lr_metrics[3]:.4f}")
print()

# Random Forest metrikleri
rf_metrics = evaluate_model(y_test, y_pred_rf)
print("Random Forest Performansı:")
print(f"Accuracy: {rf_metrics[0]:.4f}")
print(f"Precision: {rf_metrics[1]:.4f}")
print(f"Recall: {rf_metrics[2]:.4f}")
print(f"F1 Skoru: {rf_metrics[3]:.4f}")
print()

# Decision Tree metrikleri
dt_metrics = evaluate_model(y_test, y_pred_dt)
print("Decision Tree Performansı:")
print(f"Accuracy: {dt_metrics[0]:.4f}")
print(f"Precision: {dt_metrics[1]:.4f}")
print(f"Recall: {dt_metrics[2]:.4f}")
print(f"F1 Skoru: {dt_metrics[3]:.4f}")


Logistic Regression Performansı:
Accuracy: 0.8314
Precision: 0.8727
Recall: 0.7737
F1 Skoru: 0.8202

Random Forest Performansı:
Accuracy: 0.9998
Precision: 0.9999
Recall: 0.9997
F1 Skoru: 0.9998

Decision Tree Performansı:
Accuracy: 0.9935
Precision: 0.9923
Recall: 0.9945
F1 Skoru: 0.9934
