In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler


In [6]:
# Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data"
column_names = ["ID", "Diagnosis", "MeanRadius", "MeanTexture", "MeanPerimeter", "MeanArea", "MeanSmoothness", "MeanCompactness", "MeanConcavity", "MeanConcavePoints", "MeanSymmetry", "MeanFractalDimension", "SERadius", "SETexture", "SEPerimeter", "SEArea", "SESmoothness", "SECompactness", "SEConcavity", "SEConcavePoints", "SESymmetry", "SEFractalDimension", "WorstRadius", "WorstTexture", "WorstPerimeter", "WorstArea", "WorstSmoothness", "WorstCompactness", "WorstConcavity", "WorstConcavePoints", "WorstSymmetry", "WorstFractalDimension"]
breast_cancer_data = pd.read_csv(url, names=column_names)

# Display the first few rows of the dataset
print(breast_cancer_data.head())

         ID Diagnosis  MeanRadius  MeanTexture  MeanPerimeter  MeanArea  \
0    842302         M       17.99        10.38         122.80    1001.0   
1    842517         M       20.57        17.77         132.90    1326.0   
2  84300903         M       19.69        21.25         130.00    1203.0   
3  84348301         M       11.42        20.38          77.58     386.1   
4  84358402         M       20.29        14.34         135.10    1297.0   

   MeanSmoothness  MeanCompactness  MeanConcavity  MeanConcavePoints  ...  \
0         0.11840          0.27760         0.3001            0.14710  ...   
1         0.08474          0.07864         0.0869            0.07017  ...   
2         0.10960          0.15990         0.1974            0.12790  ...   
3         0.14250          0.28390         0.2414            0.10520  ...   
4         0.10030          0.13280         0.1980            0.10430  ...   

   WorstRadius  WorstTexture  WorstPerimeter  WorstArea  WorstSmoothness  \
0        2

In [7]:
# Separate the features and the target variable
X = breast_cancer_data.drop(["ID", "Diagnosis"], axis=1)
y = breast_cancer_data["Diagnosis"]

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [8]:
# Create an SVM classifier
model = SVC(random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = model.predict(X_test)

# Evaluate the model's performance
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

           B       0.97      0.99      0.98        71
           M       0.98      0.95      0.96        43

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114

Accuracy: 0.9736842105263158
