In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [2]:
from google.colab import files
uploaded = files.upload()

Saving breast-cancer-wisconsin.data to breast-cancer-wisconsin.data


In [3]:
# Define column names
column_names = [
    "Sample_code_number", "Clump_Thickness", "Uniformity_of_Cell_Size",
    "Uniformity_of_Cell_Shape", "Marginal_Adhesion", "Single_Epithelial_Cell_Size",
    "Bare_Nuclei", "Bland_Chromatin", "Normal_Nucleoli", "Mitoses", "Class"
]

# Load the dataset
df = pd.read_csv("breast-cancer-wisconsin.data", names=column_names, header=None, na_values="?")

# Drop missing values
df.dropna(inplace=True)

# Drop the 'Sample_code_number' column if it exists
if "Sample_code_number" in df.columns:
    df.drop(columns=["Sample_code_number"], inplace=True)

# Convert the target variable to binary labels
df["Class"] = df["Class"].map({2: 0, 4: 1})

# Convert all columns to numeric
df = df.apply(pd.to_numeric)

# Splitting the data
X = df.drop(columns=["Class"])
y = df["Class"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)

# Train and evaluate Decision Tree model
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

# Output results
print("Decision Tree Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", class_report)

# Save results to a file
results_df = pd.DataFrame({"Accuracy": [accuracy]})
results_df.to_csv("decision_tree_results.csv", index=False)
print("\nResults saved to decision_tree_results.csv")


Decision Tree Accuracy: 0.9590643274853801
Confusion Matrix:
 [[106   5]
 [  2  58]]
Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.95      0.97       111
           1       0.92      0.97      0.94        60

    accuracy                           0.96       171
   macro avg       0.95      0.96      0.96       171
weighted avg       0.96      0.96      0.96       171


Results saved to decision_tree_results.csv
