In [9]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report

In [10]:
# 1. Load the dataset from the CSV file
data = pd.read_csv("Breast_Cancer_File.csv")

# Display dataset preview and information
print("Dataset Preview:")
print(data.head())
print("\nDataset Info:")
print(data.info())

Dataset Preview:
         id diagnosis  radius_mean  texture_mean  perimeter_mean  area_mean  \
0    842302         M        17.99         10.38          122.80     1001.0   
1    842517         M        20.57         17.77          132.90     1326.0   
2  84300903         M        19.69         21.25          130.00     1203.0   
3  84348301         M        11.42         20.38           77.58      386.1   
4  84358402         M        20.29         14.34          135.10     1297.0   

   smoothness_mean  compactness_mean  concavity_mean  concave points_mean  \
0          0.11840           0.27760          0.3001              0.14710   
1          0.08474           0.07864          0.0869              0.07017   
2          0.10960           0.15990          0.1974              0.12790   
3          0.14250           0.28390          0.2414              0.10520   
4          0.10030           0.13280          0.1980              0.10430   

   ...  texture_worst  perimeter_worst  area_

In [11]:
# 2. Define the target column using 'diagnosis' and convert to numeric:
#    Mapping: 'B' -> 0 (benign), 'M' -> 1 (malignant)
if 'diagnosis' not in data.columns:
    raise ValueError("The CSV file does not contain a 'diagnosis' column. Please update the column name as needed.")

# Map diagnosis values to numeric
data['target'] = data['diagnosis'].map({'B': 0, 'M': 1})

In [12]:
# 3. Define features and target
# Drop columns that are not features: 'id', 'diagnosis', 'Unnamed: 32'
drop_cols = ['id', 'diagnosis', 'Unnamed: 32']
X = data.drop(columns=drop_cols + ['target'])
y = data['target']

print("\nFeature Names:", X.columns.tolist())
print("\nFirst 5 rows of features:")
print(X.head())
print("\nFirst 5 target values:")
print(y.head())


Feature Names: ['radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean', 'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean', 'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se', 'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se', 'fractal_dimension_se', 'radius_worst', 'texture_worst', 'perimeter_worst', 'area_worst', 'smoothness_worst', 'compactness_worst', 'concavity_worst', 'concave points_worst', 'symmetry_worst', 'fractal_dimension_worst']

First 5 rows of features:
   radius_mean  texture_mean  perimeter_mean  area_mean  smoothness_mean  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34   

In [13]:
# 4. Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5. Create and train the SVM classifier
svm_model = SVC(kernel='rbf', C=1.0, random_state=42)
svm_model.fit(X_train, y_train)

# 6. Make predictions on the test set
y_pred = svm_model.predict(X_test)

# 7. Evaluate the model using Accuracy, Precision, and Recall
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

In [14]:
print("\nBreast Cancer Prediction using SVM")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Breast Cancer Prediction using SVM
Accuracy: 0.9473684210526315
Precision: 1.0
Recall: 0.8604651162790697

Classification Report:
              precision    recall  f1-score   support

           0       0.92      1.00      0.96        71
           1       1.00      0.86      0.93        43

    accuracy                           0.95       114
   macro avg       0.96      0.93      0.94       114
weighted avg       0.95      0.95      0.95       114



In [15]:
# 8. Calculate Operation Charges based on predictions
# Define the operation charge for a malignant diagnosis
operation_charge = 20000  # $20,000 for malignant cases

# For each prediction, if the prediction is 1 (malignant), assign the operation charge; otherwise, assign 0.
charges = [operation_charge if pred == 1 else 0 for pred in y_pred]

# Print a summary of operation charges
num_malignant = sum(y_pred)
total_charges = sum(charges)
print("\nOperation Charges Summary:")
print("Number of Malignant Predictions:", num_malignant)
print("Total Operation Charges: $", total_charges)
if num_malignant > 0:
    print("Average Operation Charge per Malignant Case: $", operation_charge)
else:
    print("No malignant cases predicted; no operation charges applied.")


Operation Charges Summary:
Number of Malignant Predictions: 37
Total Operation Charges: $ 740000
Average Operation Charge per Malignant Case: $ 20000
