In [434]:
#1.Load breast cancer dataset (**structured data**)

from sklearn.datasets import load_breast_cancer

jeris = load_breast_cancer()


In [435]:
#2.Visualize the data

from sklearn.manifold import TSNE
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [436]:
#3.train_test_split

from sklearn.model_selection import train_test_split

X = np.random.rand(50, 3)
y = np.random.randint(0, 2, 50)

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.225,
    train_size=0.775,
    random_state=None,
    shuffle=True,
    stratify=None
)


print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)


X_train shape: (38, 3)
X_test shape: (12, 3)
y_train shape: (38,)
y_test shape: (12,)


In [437]:
# 4.Train **model_decision_tree**

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

jeris = load_breast_cancer()
X = jeris.data
y = jeris.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.225, random_state=42, stratify=y)

model_decision_tree = DecisionTreeClassifier(
   criterion='entropy',
    splitter='best',
    max_depth=15,
    min_samples_split=3,
    min_samples_leaf=3,
    max_leaf_nodes=4,
    class_weight='balanced',
    min_impurity_decrease=0.01,
    ccp_alpha=0.01,
    random_state=42,
    monotonic_cst=None

)


model_decision_tree.fit(X_train, y_train)
y_pred = model_decision_tree.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='binary')
recall = recall_score(y_test, y_pred, average='binary')
f1 = f1_score(y_test, y_pred, average='binary')
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")
print("Confusion Matrix:")
print(conf_matrix)



Accuracy: 0.91
Precision: 0.95
Recall: 0.91
F1-Score: 0.93
Confusion Matrix:
[[44  4]
 [ 7 74]]


In [484]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

jeris = load_breast_cancer()
X = jeris.data
y = jeris.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.225, random_state=42, stratify=y
)

model = RandomForestClassifier(
    n_estimators=100,
    criterion='entropy',
    max_depth=30,
    min_samples_split=10,
    min_samples_leaf=20,
    max_features='sqrt',
    bootstrap=True,
    class_weight='balanced',
    random_state=42,
    ccp_alpha=0.01,
    warm_start=True
)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='binary')
recall = recall_score(y_test, y_pred, average='binary')
f1 = f1_score(y_test, y_pred, average='binary')
conf_matrix = confusion_matrix(y_test, y_pred)

print("Random Forest Results:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")
print("Confusion Matrix:")
print(conf_matrix)


  warn(


Random Forest Results:
Accuracy: 0.95
Precision: 0.97
Recall: 0.94
F1-Score: 0.96
Confusion Matrix:
[[46  2]
 [ 5 76]]


In [489]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix


base_estimator = DecisionTreeClassifier(max_depth=4)

model_adaboost = AdaBoostClassifier(
    estimator=base_estimator,
    n_estimators=300,
    learning_rate=0.11,
    random_state=42

)

model_adaboost.fit(X_train, y_train)

y_pred = model_adaboost.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy:.3f}")
print(f"Precision: {precision:.3f}")
print(f"Recall: {recall:.3f}")
print(f"F1-Score: {f1:.3f}")
print("Confusion Matrix:")



Accuracy: 0.961
Precision: 0.963
Recall: 0.975
F1-Score: 0.969
Confusion Matrix:


In [488]:
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score


# מודל 1: AdaBoost
ada_model = AdaBoostClassifier(
    estimator=DecisionTreeClassifier(max_depth=4),
    n_estimators=300,
    learning_rate=0.11,
    random_state=42
)

# מודל 2: RandomForest
rf_model = RandomForestClassifier(
    n_estimators=100,
    criterion='entropy',
    max_depth=30,
    min_samples_split=10,
    min_samples_leaf=20,
    max_features='sqrt',
    bootstrap=True,
    class_weight='balanced',
    random_state=42,
    ccp_alpha=0.01,
    warm_start=True
)

# מודל 3: DecisionTreeClassifier
dt_model = DecisionTreeClassifier(
    criterion='entropy',
    splitter='best',
    max_depth=15,
    min_samples_split=3,
    min_samples_leaf=3,
    max_leaf_nodes=4,
    class_weight='balanced',
    min_impurity_decrease=0.01,
    ccp_alpha=0.01,
    random_state=42
)

models = {
    "AdaBoost": ada_model,
    "RandomForest": rf_model,
    "DecisionTreeClassifier": dt_model
}

results = []
best_model = None
best_f1_score = 0

for model_name, model in models.items():
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='binary')
    recall = recall_score(y_test, y_pred, average='binary')
    f1 = f1_score(y_test, y_pred, average='binary')
    conf_matrix = confusion_matrix(y_test, y_pred)

    results.append({
        "Model": model_name,
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1-Score": f1,
        "Confusion Matrix": conf_matrix
    })

    if f1 > best_f1_score:
        best_f1_score = f1
        best_model = model_name

print("Performance Comparison Between Models:")
for result in results:
    print(f"\nModel: {result['Model']}")
    print(f"Accuracy: {result['Accuracy']:.3f}")
    print(f"Precision: {result['Precision']:.3f}")
    print(f"Recall: {result['Recall']:.3f}")
    print(f"F1-Score: {result['F1-Score']:.3f}")
    print("Confusion Matrix:")
    print(result["Confusion Matrix"])

print("\nBest Model:")
print(f"The best model is '{best_model}' with an F1-Score of {best_f1_score:.3f}")

  warn(


Performance Comparison Between Models:

Model: AdaBoost
Accuracy: 0.961
Precision: 0.963
Recall: 0.975
F1-Score: 0.969
Confusion Matrix:
[[45  3]
 [ 2 79]]

Model: RandomForest
Accuracy: 0.946
Precision: 0.974
Recall: 0.938
F1-Score: 0.956
Confusion Matrix:
[[46  2]
 [ 5 76]]

Model: DecisionTreeClassifier
Accuracy: 0.915
Precision: 0.949
Recall: 0.914
F1-Score: 0.931
Confusion Matrix:
[[44  4]
 [ 7 74]]

Best Model:
The best model is 'AdaBoost' with an F1-Score of 0.969
