<a href="https://www.kaggle.com/code/kelvinmwathi/build-and-evaluate-classification-models?scriptVersionId=243319111" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
# Importing libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, classification_report



****  Load dataset

In [2]:

data = load_breast_cancer()
X = data.data
y = data.target
feature_names = data.feature_names
target_names = data.target_names
print(data.DESCR)



.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        worst/largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 0 is Mean Radi

 # Split dataset into train and test sets

In [3]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



# Feature scaling

In [4]:

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



# Logistic Regression Model

In [5]:

log_model = LogisticRegression()
log_model.fit(X_train_scaled, y_train)
log_preds = log_model.predict(X_test_scaled)



# SVM Model

In [6]:

svm_model = SVC(kernel='linear')
svm_model.fit(X_train_scaled, y_train)
svm_preds = svm_model.predict(X_test_scaled)



# Evaluation Function

In [7]:

def evaluate_model(y_true, y_pred, model_name):
    print(f"--- {model_name} ---")
    print("Confusion Matrix:")
    print(confusion_matrix(y_true, y_pred))
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred))
    print("Recall:", recall_score(y_true, y_pred))
    print()



In [8]:

evaluate_model(y_test, log_preds, "Logistic Regression")
evaluate_model(y_test, svm_preds, "SVM (Linear Kernel)")


--- Logistic Regression ---
Confusion Matrix:
[[41  2]
 [ 1 70]]
Accuracy: 0.9736842105263158
Precision: 0.9722222222222222
Recall: 0.9859154929577465

--- SVM (Linear Kernel) ---
Confusion Matrix:
[[41  2]
 [ 3 68]]
Accuracy: 0.956140350877193
Precision: 0.9714285714285714
Recall: 0.9577464788732394

