In [1]:
from sklearn.datasets import load_breast_cancer

In [2]:
import pandas as pd
import numpy as np

In [3]:
data = load_breast_cancer()

In [4]:
X = data.data
y = data.target

In [5]:
df = pd.DataFrame(X, columns=data.feature_names)

In [6]:
df['target'] = y

In [7]:
from sklearn.preprocessing import StandardScaler

In [8]:
scaler = StandardScaler()

In [9]:
X_scaled = scaler.fit_transform(X)

In [10]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [12]:
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
log_reg_score = log_reg.score(X_test, y_test)

In [13]:
from sklearn.tree import DecisionTreeClassifier

In [14]:
dt_classifier = DecisionTreeClassifier()
dt_classifier.fit(X_train, y_train)
dt_score = dt_classifier.score(X_test, y_test)

In [15]:
from sklearn.ensemble import RandomForestClassifier

In [16]:
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train, y_train)
rf_score = rf_classifier.score(X_test, y_test)

In [17]:
from sklearn.svm import SVC

In [18]:
svm_classifier = SVC()
svm_classifier.fit(X_train, y_train)
svm_score = svm_classifier.score(X_test, y_test)

In [19]:
from sklearn.neighbors import KNeighborsClassifier

In [20]:
knn_classifier = KNeighborsClassifier()
knn_classifier.fit(X_train, y_train)
knn_score = knn_classifier.score(X_test, y_test)

In [21]:
scores = {
    'Model': ['Logistic Regression', 'Decision Tree', 'Random Forest', 'SVM', 'k-NN'],
    'Accuracy': [log_reg_score, dt_score, rf_score, svm_score, knn_score]
}

In [22]:
scores_df = pd.DataFrame(scores)

In [23]:
print(scores_df)

                 Model  Accuracy
0  Logistic Regression  0.973684
1        Decision Tree  0.938596
2        Random Forest  0.964912
3                  SVM  0.973684
4                 k-NN  0.947368


In [24]:
best_model = scores_df.loc[scores_df['Accuracy'].idxmax()]

In [25]:
worst_model = scores_df.loc[scores_df['Accuracy'].idxmin()]

In [26]:
print(f"Best Model: {best_model['Model']} with accuracy {best_model['Accuracy']}")
print(f"Worst Model: {worst_model['Model']} with accuracy {worst_model['Accuracy']}")

Best Model: Logistic Regression with accuracy 0.9736842105263158
Worst Model: Decision Tree with accuracy 0.9385964912280702
