In [None]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import KFold

# Lade den Datensatz
data = load_breast_cancer()
X, y = data.data, data.target

# Teile den Datensatz in Trainings-, Test- und Validierungsdatensätze auf
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Definiere die Anzahl der Folds für die Kreuzvalidierung
N_SPLITS = 10
kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=42)

# Initialisiere die Klassifikatoren
dt_clf = DecisionTreeClassifier(random_state=42, max_depth=5)
rf_clf = RandomForestClassifier(random_state=42)

# Eigene Scorer-Funktionen mit zero_division=1
def precision_scorer(y_true, y_pred):
    return precision_score(y_true, y_pred, average='macro', zero_division=1)

def recall_scorer(y_true, y_pred):
    return recall_score(y_true, y_pred, average='macro', zero_division=1)

def f1_scorer(y_true, y_pred):
    return f1_score(y_true, y_pred, average='macro', zero_division=1)

# Funktion zur Kreuzvalidierung
def evaluate_model(model, X, y):
    accuracy = cross_val_score(model, X, y, cv=kf, scoring='accuracy')
    precision = cross_val_score(model, X, y, cv=kf, scoring=make_scorer(precision_scorer))
    recall = cross_val_score(model, X, y, cv=kf, scoring=make_scorer(recall_scorer))
    f1 = cross_val_score(model, X, y, cv=kf, scoring=make_scorer(f1_scorer))

    return {
        "Accuracy": (accuracy.mean(), accuracy.std()),
        "Precision": (precision.mean(), precision.std()),
        "Recall": (recall.mean(), recall.std()),
        "F1-Score": (f1.mean(), f1.std())
    }

# Modelle bewerten
dt_results = evaluate_model(dt_clf, X_train, y_train)
rf_results = evaluate_model(rf_clf, X_train, y_train)

# Ergebnisse ausgeben
def print_results(name, results):
    print(f"Ergebnisse für {name}:")
    for metric, (mean, std) in results.items():
        print(f"{metric}: {mean:.4f} (+/- {std:.4f})")
    print("\n")

print_results("Decision Tree", dt_results)
print_results("Random Forest", rf_results)

Ergebnisse für Decision Tree:
Accuracy: 0.9021 (+/- 0.0607)
Precision: 0.8999 (+/- 0.0640)
Recall: 0.8943 (+/- 0.0690)
F1-Score: 0.8911 (+/- 0.0662)


Ergebnisse für Random Forest:
Accuracy: 0.9599 (+/- 0.0300)
Precision: 0.9636 (+/- 0.0292)
Recall: 0.9527 (+/- 0.0323)
F1-Score: 0.9558 (+/- 0.0309)




In [None]:
import numpy as np
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score

# Trainiere das Random Forest Modell mit dem gesamten Trainings- und Validierungsdatensatz
rf_clf.fit(X_train, y_train)

# Definiere die Anzahl der Bootstrap-Samples
n_bootstrap_samples = 1000

# Initialisiere Listen für die Metriken
accuracy_scores = []
precision_scores = []
recall_scores = []
f1_scores = []

# Bootstrap-Schleife
for _ in range(n_bootstrap_samples):
  # Erstelle ein Bootstrap-Sample aus dem Testdatensatz
  bootstrap_indices = np.random.choice(len(X_test), size=len(X_test), replace=True)
  X_bootstrap = X_test[bootstrap_indices]
  y_bootstrap = y_test[bootstrap_indices]

  # Berechne die Vorhersagen mit dem trainierten Modell
  y_pred = rf_clf.predict(X_bootstrap)

  # Berechne die Metriken für das Bootstrap-Sample
  accuracy_scores.append(accuracy_score(y_bootstrap, y_pred))
  precision_scores.append(precision_score(y_bootstrap, y_pred, average='macro', zero_division=1))
  recall_scores.append(recall_score(y_bootstrap, y_pred, average='macro', zero_division=1))
  f1_scores.append(f1_score(y_bootstrap, y_pred, average='macro', zero_division=1))

# Berechne Mittelwert und Standardabweichung der Metriken
accuracy_mean = np.mean(accuracy_scores)
accuracy_std = np.std(accuracy_scores)
precision_mean = np.mean(precision_scores)
precision_std = np.std(precision_scores)
recall_mean = np.mean(recall_scores)
recall_std = np.std(recall_scores)
f1_mean = np.mean(f1_scores)
f1_std = np.std(f1_scores)

print("Ergebnisse für Random Forest auf Testdaten mit Bootstrapping:")
print(f"Accuracy: {accuracy_mean:.4f} (+/- {accuracy_std:.4f})")
print(f"Precision: {precision_mean:.4f} (+/- {precision_std:.4f})")
print(f"Recall: {recall_mean:.4f} (+/- {recall_std:.4f})")
print(f"F1-Score: {f1_mean:.4f} (+/- {f1_std:.4f})")

Ergebnisse für Random Forest auf Testdaten mit Bootstrapping:
Accuracy: 0.9711 (+/- 0.0131)
Precision: 0.9739 (+/- 0.0122)
Recall: 0.9640 (+/- 0.0165)
F1-Score: 0.9683 (+/- 0.0143)
