In [6]:
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report
from IPython.display import display, Markdown

print("""
This program demonstrates the use of a Decision Tree model for breast cancer classification. 
The model is trained on the Breast Cancer dataset, a well-known dataset for binary classification tasks.
The dataset contains features calculated from fine needle aspirates (FNA) of breast masses, 
and the task is to predict whether a mass is malignant or benign.
""")

data = load_breast_cancer()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

feature_descriptions = {
    'mean radius': 'Mean radius of the cell nuclei',
    'mean texture': 'Mean texture of the cell nuclei',
    'mean perimeter': 'Mean perimeter of the cell nuclei',
    'mean area': 'Mean area of the cell nuclei',
    'mean smoothness': 'Mean smoothness of the cell nuclei',
    'mean compactness': 'Mean compactness of the cell nuclei',
    'mean concavity': 'Mean concavity of the cell nuclei',
    'mean concave points': 'Mean number of concave portions of the contour',
    'mean symmetry': 'Mean symmetry of the cell nuclei',
    'mean fractal dimension': 'Mean fractal dimension of the cell nuclei',
    'radius error': 'Standard error of the radius of the cell nuclei',
    'texture error': 'Standard error of the texture of the cell nuclei',
    'perimeter error': 'Standard error of the perimeter of the cell nuclei',
    'area error': 'Standard error of the area of the cell nuclei',
    'smoothness error': 'Standard error of the smoothness of the cell nuclei',
    'compactness error': 'Standard error of the compactness of the cell nuclei',
    'concavity error': 'Standard error of the concavity of the cell nuclei',
    'concave points error': 'Standard error of the concave points of the cell nuclei',
    'symmetry error': 'Standard error of the symmetry of the cell nuclei',
    'fractal dimension error': 'Standard error of the fractal dimension of the cell nuclei',
    'worst radius': 'Worst (largest) radius of the cell nuclei',
    'worst texture': 'Worst (largest) texture of the cell nuclei',
    'worst perimeter': 'Worst (largest) perimeter of the cell nuclei',
    'worst area': 'Worst (largest) area of the cell nuclei',
    'worst smoothness': 'Worst (largest) smoothness of the cell nuclei',
    'worst compactness': 'Worst (largest) compactness of the cell nuclei',
    'worst concavity': 'Worst (largest) concavity of the cell nuclei',
    'worst concave points': 'Worst (largest) number of concave portions of the contour',
    'worst symmetry': 'Worst (largest) symmetry of the cell nuclei',
    'worst fractal dimension': 'Worst (largest) fractal dimension of the cell nuclei',
}


X = df.drop('target', axis=1)
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

tree_rules = export_text(model, feature_names=list(X.columns))
display(Markdown("## Decision Tree\n" + f"```\n{tree_rules}\n```"))

print("""
Enter features for prediction. These features represent characteristics of a breast mass.
Please enter numerical values for the following features:
""")

user_input_values = []
for feature in X.columns:
    print(f"{feature}: {feature_descriptions.get(feature, 'Description not available')}")
    
    value = float(input(f"Enter {feature}: "))
    user_input_values.append(value)

user_input_df = pd.DataFrame([user_input_values], columns=X.columns)
print("\nUser Input:")
display(user_input_df)

user_prediction = model.predict(user_input_df)[0]
print(f"\nPrediction for the user input: {user_prediction}\n")

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
roc_auc = roc_auc_score(y_test, y_pred, average='weighted')

conf_matrix = confusion_matrix(y_test, y_pred)

print("## Model Evaluation Metrics")
print(f"\n**Accuracy**: {accuracy:.2%}")
print(f"**Precision**: {precision:.2%}")
print(f"**Recall**: {recall:.2%}")
print(f"**F1 Score**: {f1:.2%}")
print(f"**ROC AUC Score**: {roc_auc:.2%}")

print("\n**Confusion Matrix:**")
display(pd.DataFrame(conf_matrix, columns=df['target'].unique(), index=df['target'].unique()))

print("\n**Classification Report:**")
print(classification_report(y_test, y_pred))



This program demonstrates the use of a Decision Tree model for breast cancer classification. 
The model is trained on the Breast Cancer dataset, a well-known dataset for binary classification tasks.
The dataset contains features calculated from fine needle aspirates (FNA) of breast masses, 
and the task is to predict whether a mass is malignant or benign.



## Decision Tree
```
|--- mean concave points <= 0.05
|   |--- worst radius <= 16.83
|   |   |--- area error <= 48.70
|   |   |   |--- worst smoothness <= 0.18
|   |   |   |   |--- smoothness error <= 0.00
|   |   |   |   |   |--- worst texture <= 27.76
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- worst texture >  27.76
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |--- smoothness error >  0.00
|   |   |   |   |   |--- worst texture <= 33.35
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- worst texture >  33.35
|   |   |   |   |   |   |--- worst texture <= 33.56
|   |   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |   |--- worst texture >  33.56
|   |   |   |   |   |   |   |--- class: 1
|   |   |   |--- worst smoothness >  0.18
|   |   |   |   |--- class: 0
|   |   |--- area error >  48.70
|   |   |   |--- concavity error <= 0.02
|   |   |   |   |--- class: 0
|   |   |   |--- concavity error >  0.02
|   |   |   |   |--- class: 1
|   |--- worst radius >  16.83
|   |   |--- mean texture <= 16.19
|   |   |   |--- class: 1
|   |   |--- mean texture >  16.19
|   |   |   |--- concave points error <= 0.01
|   |   |   |   |--- class: 0
|   |   |   |--- concave points error >  0.01
|   |   |   |   |--- class: 1
|--- mean concave points >  0.05
|   |--- worst concave points <= 0.15
|   |   |--- worst perimeter <= 115.25
|   |   |   |--- mean texture <= 21.06
|   |   |   |   |--- class: 1
|   |   |   |--- mean texture >  21.06
|   |   |   |   |--- class: 0
|   |   |--- worst perimeter >  115.25
|   |   |   |--- class: 0
|   |--- worst concave points >  0.15
|   |   |--- fractal dimension error <= 0.01
|   |   |   |--- class: 0
|   |   |--- fractal dimension error >  0.01
|   |   |   |--- class: 1

```


Enter features for prediction. These features represent characteristics of a breast mass.
Please enter numerical values for the following features:

mean radius: Mean radius of the cell nuclei


Enter mean radius:  45


mean texture: Mean texture of the cell nuclei


Enter mean texture:  43


mean perimeter: Mean perimeter of the cell nuclei


Enter mean perimeter:  67


mean area: Mean area of the cell nuclei


Enter mean area:  43


mean smoothness: Mean smoothness of the cell nuclei


Enter mean smoothness:  78


mean compactness: Mean compactness of the cell nuclei


Enter mean compactness:  43


mean concavity: Mean concavity of the cell nuclei


Enter mean concavity:  89


mean concave points: Mean number of concave portions of the contour


Enter mean concave points:  43


mean symmetry: Mean symmetry of the cell nuclei


Enter mean symmetry:  2


mean fractal dimension: Mean fractal dimension of the cell nuclei


Enter mean fractal dimension:  1


radius error: Standard error of the radius of the cell nuclei


Enter radius error:  3


texture error: Standard error of the texture of the cell nuclei


Enter texture error:  67


perimeter error: Standard error of the perimeter of the cell nuclei


Enter perimeter error:  87


area error: Standard error of the area of the cell nuclei


Enter area error:  99


smoothness error: Standard error of the smoothness of the cell nuclei


Enter smoothness error:  123


compactness error: Standard error of the compactness of the cell nuclei


Enter compactness error:  12345


concavity error: Standard error of the concavity of the cell nuclei


Enter concavity error:  765


concave points error: Standard error of the concave points of the cell nuclei


Enter concave points error:  456


symmetry error: Standard error of the symmetry of the cell nuclei


Enter symmetry error:  654


fractal dimension error: Standard error of the fractal dimension of the cell nuclei


Enter fractal dimension error:  456


worst radius: Worst (largest) radius of the cell nuclei


Enter worst radius:  876


worst texture: Worst (largest) texture of the cell nuclei


Enter worst texture:  897


worst perimeter: Worst (largest) perimeter of the cell nuclei


Enter worst perimeter:  456


worst area: Worst (largest) area of the cell nuclei


Enter worst area:  987


worst smoothness: Worst (largest) smoothness of the cell nuclei


Enter worst smoothness:  65


worst compactness: Worst (largest) compactness of the cell nuclei


Enter worst compactness:  876


worst concavity: Worst (largest) concavity of the cell nuclei


Enter worst concavity:  987


worst concave points: Worst (largest) number of concave portions of the contour


Enter worst concave points:  456


worst symmetry: Worst (largest) symmetry of the cell nuclei


Enter worst symmetry:  987


worst fractal dimension: Worst (largest) fractal dimension of the cell nuclei


Enter worst fractal dimension:  54



User Input:


Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,45.0,43.0,67.0,43.0,78.0,43.0,89.0,43.0,2.0,1.0,...,876.0,897.0,456.0,987.0,65.0,876.0,987.0,456.0,987.0,54.0



Prediction for the user input: 1

## Model Evaluation Metrics

**Accuracy**: 94.74%
**Precision**: 94.74%
**Recall**: 94.74%
**F1 Score**: 94.74%
**ROC AUC Score**: 94.40%

**Confusion Matrix:**


Unnamed: 0,0,1
0,40,3
1,3,68



**Classification Report:**
              precision    recall  f1-score   support

           0       0.93      0.93      0.93        43
           1       0.96      0.96      0.96        71

    accuracy                           0.95       114
   macro avg       0.94      0.94      0.94       114
weighted avg       0.95      0.95      0.95       114

