In [9]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import (accuracy_score, confusion_matrix, classification_report, roc_auc_score, f1_score,
                             recall_score, precision_score, balanced_accuracy_score, matthews_corrcoef)
import pandas as pd


# Load data from CSV file
data = pd.read_csv("/Users/alyssajames/BMEN415/Classification Models/breast-cancer.csv")

# Fill missing values with the mean of the respective column
data = data.fillna(data.mean())

# Define the features and target variables
X = data.drop('diagnosis', axis=1)
y = data['diagnosis']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Create the Decision Tree classifier
classifier = DecisionTreeClassifier(random_state=42)

# Train the classifier using the training data
classifier.fit(X_train, y_train)

# Make predictions on the test data
y_pred = classifier.predict(X_test)

# Get predicted probabilities for the positive class
y_pred_proba = classifier.predict_proba(X_test)[:, 1]


# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)
auc_roc = roc_auc_score(y_test, y_pred_proba)
f1 = f1_score(y_test, y_pred, average="weighted")
recall = recall_score(y_test, y_pred, average="weighted")
precision = precision_score(y_test, y_pred, average="weighted")
balanced_accuracy = balanced_accuracy_score(y_test, y_pred)
matthews_corr = matthews_corrcoef(y_test, y_pred)

# Print the results
print("\nAccuracy:", accuracy)
print("\nConfusion Matrix:\n", conf_matrix)
print("\nClassification Report:\n", class_report)
print("\nAUC-ROC:", auc_roc)
print("\nF1 Score:", f1)
print("\nRecall:", recall)
print("\nPrecision:", precision)
print("\nBalanced Accuracy:", balanced_accuracy)
print("\nMatthews Correlation Coefficient:", matthews_corr)


Accuracy: 0.9473684210526315

Confusion Matrix:
 [[38  2]
 [ 1 16]]

Classification Report:
               precision    recall  f1-score   support

           B       0.97      0.95      0.96        40
           M       0.89      0.94      0.91        17

    accuracy                           0.95        57
   macro avg       0.93      0.95      0.94        57
weighted avg       0.95      0.95      0.95        57


AUC-ROC: 0.9455882352941176

F1 Score: 0.9477871894927191

Recall: 0.9473684210526315

Precision: 0.9488678962363172

Balanced Accuracy: 0.9455882352941176

Matthews Correlation Coefficient: 0.877101011293492
