In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn import preprocessing
breast_cancer = load_breast_cancer()
print(f'Performance of methods on breast cancer data')
X, y = breast_cancer.data, breast_cancer.target
scaler = preprocessing.StandardScaler().fit(X)
X_transformed = scaler.transform(X)
X = X_transformed

Performance of methods on breast cancer data


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier


# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a linear SVM
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)
accuracy_svm = accuracy_score(y_test, y_pred_svm)

# Train an SVM with RBF kernel
rbf_svm_model = SVC(kernel='rbf')
rbf_svm_model.fit(X_train, y_train)
y_pred_rbf_svm = rbf_svm_model.predict(X_test)
accuracy_rbf_svm = accuracy_score(y_test, y_pred_rbf_svm)


# Train logistic regression
logreg_model = LogisticRegression()
logreg_model.fit(X_train, y_train)
y_pred_logreg = logreg_model.predict(X_test)
accuracy_logreg = accuracy_score(y_test, y_pred_logreg)

# Train Decision Tree classifier
tree_model = DecisionTreeClassifier()
tree_model.fit(X_train, y_train)
y_pred_tree = tree_model.predict(X_test)
accuracy_tree = accuracy_score(y_test, y_pred_tree)


# Train a Random Forest classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
accuracy_rf = accuracy_score(y_test, y_pred_rf)


# Train a K-Nearest Neighbors classifier
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train, y_train)
y_pred_knn = knn_model.predict(X_test)
accuracy_knn = accuracy_score(y_test, y_pred_knn)



# Train a Gradient Boosting classifier
gb_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gb_model.fit(X_train, y_train)
y_pred_gb = gb_model.predict(X_test)
accuracy_gb = accuracy_score(y_test, y_pred_gb)



print("Linear SVM Accuracy:", accuracy_svm)
print("RBF SVM Accracy", accuracy_rbf_svm)
print("Logistic Regression Accuracy:", accuracy_logreg)
print("Decision Tree Acuracy:", accuracy_tree)
print("Random Forest Accuracy:", accuracy_rf)
print("KNN Accuracy:", accuracy_knn)
print("Gradient Boosting Accuracy:", accuracy_gb)


print("#####################")

svm_cm = confusion_matrix(y_test, y_pred_svm)
print(f'SVM Confusion matrix \n {svm_cm}')

svm_rbf_cm = confusion_matrix(y_test, y_pred_rbf_svm)
print(f'SVM RBF Confusion matrix \n {svm_rbf_cm}')

logreg_cm = confusion_matrix(y_test, y_pred_logreg)
print(f'Logistic Regression Confusion matrix \n {logreg_cm}')

tree_cm=confusion_matrix(y_test, y_pred_tree)
print(f'Decision Tree Confusion matrix \n {tree_cm}')

rf_cm = confusion_matrix(y_test, y_pred_rf)
print(f'Random Forest Confusion matrix \n {rf_cm}')

knn_cm = confusion_matrix(y_test, y_pred_knn)
print(f'KNN Confusion matrix \n {knn_cm}')

gb_cm = confusion_matrix(y_test, y_pred_gb)
print(f'Gradient Boosting Confusion matrix \n {gb_cm}')

  



Linear SVM Accuracy: 0.956140350877193
RBF SVM Accracy 0.9736842105263158
Logistic Regression Accuracy: 0.9736842105263158
Decision Tree Acuracy: 0.9298245614035088
Random Forest Accuracy: 0.9649122807017544
KNN Accuracy: 0.9473684210526315
Gradient Boosting Accuracy: 0.956140350877193
#####################
SVM Confusion matrix 
 [[41  2]
 [ 3 68]]
SVM RBF Confusion matrix 
 [[41  2]
 [ 1 70]]
Logistic Regression Confusion matrix 
 [[41  2]
 [ 1 70]]
Decision Tree Confusion matrix 
 [[39  4]
 [ 4 67]]
Random Forest Confusion matrix 
 [[40  3]
 [ 1 70]]
KNN Confusion matrix 
 [[40  3]
 [ 3 68]]
Gradient Boosting Confusion matrix 
 [[40  3]
 [ 2 69]]
