In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
import joblib  # Modeli kaydetmek için gerekli kütüphane

# Veri setini yükle
data_path = r"C:\\Users\\Buse Yener\\big_data\\standardized_data.csv"  # CSV dosyasının yolu
data = pd.read_csv(data_path)

# Bağımsız ve bağımlı değişkenler
X = data.drop('Class', axis=1)
y = data['Class']

# Veriyi eğitim ve test setlerine ayır
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Veriyi standardize et
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Random Forest Modeli
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train_scaled, y_train)

# Test seti üzerinde tahminler
y_pred_rf = rf_model.predict(X_test_scaled)

# Performans metriklerini hesapla
def evaluate_model(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    return accuracy, precision, recall, f1

# Random Forest metrikleri
rf_metrics = evaluate_model(y_test, y_pred_rf)
print("Random Forest Performansı:")
print(f"Accuracy: {rf_metrics[0]:.4f}")
print(f"Precision: {rf_metrics[1]:.4f}")
print(f"Recall: {rf_metrics[2]:.4f}")
print(f"F1 Skoru: {rf_metrics[3]:.4f}")

# Modeli kaydet
joblib.dump(rf_model, 'random_forest_model.pkl')
print("Model başarıyla kaydedildi!")


Random Forest Performansı:
Accuracy: 0.9998
Precision: 0.9999
Recall: 0.9997
F1 Skoru: 0.9998
Model başarıyla kaydedildi!
