In [2]:
# To check the metrics
# Breast Cancer Dataset 

In [1]:
# ðŸ“¦ Step 1: Import libraries
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# ðŸ“‚ Step 2: Load dataset
data = load_breast_cancer()
X = data.data
y = data.target

# ðŸ§ª Step 3: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# ðŸŒ³ Step 4: Train Decision Tree
dt_model = DecisionTreeClassifier(max_depth=4, random_state=42)
dt_model.fit(X_train, y_train)
y_pred_dt = dt_model.predict(X_test)

# ðŸŒ² Step 5: Train Random Forest
rf_model = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

# ðŸ“Š Step 6: Evaluate both models
print("ðŸŽ¯ Decision Tree Results")
print("Accuracy:", accuracy_score(y_test, y_pred_dt))
print(classification_report(y_test, y_pred_dt))

print("\nðŸŽ¯ Random Forest Results")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))


ðŸŽ¯ Decision Tree Results
Accuracy: 0.9532163742690059
              precision    recall  f1-score   support

           0       0.94      0.94      0.94        63
           1       0.96      0.96      0.96       108

    accuracy                           0.95       171
   macro avg       0.95      0.95      0.95       171
weighted avg       0.95      0.95      0.95       171


ðŸŽ¯ Random Forest Results
Accuracy: 0.9649122807017544
              precision    recall  f1-score   support

           0       0.97      0.94      0.95        63
           1       0.96      0.98      0.97       108

    accuracy                           0.96       171
   macro avg       0.97      0.96      0.96       171
weighted avg       0.96      0.96      0.96       171



In [13]:
# Ask user which model and then input features either manually or autogenerated

In [16]:
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import numpy as np

# Load data
data = load_breast_cancer()
X = data.data
y = data.target
feature_names = data.feature_names

# Train models
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

dt_model = DecisionTreeClassifier(max_depth=5, random_state=42)
dt_model.fit(X_train, y_train)

rf_model = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
rf_model.fit(X_train, y_train)

# Show features
print("\n Breast Cancer Input Features:")
for idx, feature in enumerate(feature_names, 1):
    print(f"{idx:2d}. {feature}") # idx for indexing: formating instruction ,  2 reserve two spaces, d format as whole number , . seperate feature from index

# Ask for input type
print("\n Choose input method:")
print("1 - Manually enter the 30 feature values")
print("2 - Generate random valid values for testing")
input_method = input("Enter choice (1 or 2): ")

# Prepare input
if input_method == '1':
    print("\n Enter 30 feature values separated by commas:")
    input_data = input("Enter values: ")
    try:
        input_list = list(map(float, input_data.strip().split(',')))
        if len(input_list) != 30:
            print("You must enter exactly 30 values.")
            exit()
        new_sample = np.array([input_list])
    except:
        print("Invalid input format.")
        exit()

elif input_method == '2':
    print("Generating random input based on real data ranges...")
    mins = X.min(axis=0)
    maxs = X.max(axis=0)
    random_input = np.random.uniform(mins, maxs)
    new_sample = np.array([random_input])
    print("\n Random Input Generated:")
    for i, val in enumerate(random_input):
        print(f"{feature_names[i]}: {val:.4f}")  # :.4f is a format specifier , : starts the formating , .4f  format as a floating-point number with 4 decimal places 
else:
    print("Invalid choice.")
    exit()

# Ask for model choice
print("\n Choose a model to predict:")
print("1 - Decision Tree")
print("2 - Random Forest")
model_choice = input("Enter choice (1 or 2): ")

# Make prediction
if model_choice == '1':
    pred = dt_model.predict(new_sample)
    print("\n Prediction (Decision Tree):", "Malignant (Cancerous)" if pred[0] == 0 else "Benign (Non-Cancerous)")
elif model_choice == '2':
    pred = rf_model.predict(new_sample)
    print("\n Prediction (Random Forest):", "Malignant (Cancerous)" if pred[0] == 0 else "Benign (Non-Cancerous)")
else:
    print("Invalid model choice.")



 Breast Cancer Input Features:
 1. mean radius
 2. mean texture
 3. mean perimeter
 4. mean area
 5. mean smoothness
 6. mean compactness
 7. mean concavity
 8. mean concave points
 9. mean symmetry
10. mean fractal dimension
11. radius error
12. texture error
13. perimeter error
14. area error
15. smoothness error
16. compactness error
17. concavity error
18. concave points error
19. symmetry error
20. fractal dimension error
21. worst radius
22. worst texture
23. worst perimeter
24. worst area
25. worst smoothness
26. worst compactness
27. worst concavity
28. worst concave points
29. worst symmetry
30. worst fractal dimension

 Choose input method:
1 - Manually enter the 30 feature values
2 - Generate random valid values for testing


Enter choice (1 or 2):  2


Generating random input based on real data ranges...

 Random Input Generated:
mean radius: 22.0309
mean texture: 23.5988
mean perimeter: 125.5088
mean area: 976.8491
mean smoothness: 0.0946
mean compactness: 0.0970
mean concavity: 0.3193
mean concave points: 0.0712
mean symmetry: 0.2211
mean fractal dimension: 0.0771
radius error: 1.6021
texture error: 4.5265
perimeter error: 18.6446
area error: 253.8946
smoothness error: 0.0223
compactness error: 0.0616
concavity error: 0.3582
concave points error: 0.0297
symmetry error: 0.0637
fractal dimension error: 0.0283
worst radius: 33.2224
worst texture: 13.3271
worst perimeter: 234.9503
worst area: 1004.8202
worst smoothness: 0.1094
worst compactness: 0.8021
worst concavity: 0.7832
worst concave points: 0.0423
worst symmetry: 0.6365
worst fractal dimension: 0.1500

 Choose a model to predict:
1 - Decision Tree
2 - Random Forest


Enter choice (1 or 2):  1



 Prediction (Decision Tree): Malignant (Cancerous)


In [None]:
''' input for manual entry sample
12.45,15.70,82.57,477.1,0.1278,0.1709,0.1578,0.08089,0.2087,0.07613,
0.3345,1.281,2.077,27.94,0.005731,0.03502,0.04946,0.01352,0.01957,0.003619,
13.50,22.23,87.6,546.3,0.165,0.8681,0.8843,0.3269,0.6221,0.1244
'''