##### CLASSIFICATION MODELS & EVALUATION METRICS
###### Breast Cancer Dataset

In [None]:
import sklearn

In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pandas as pd
import numpy as np

In [None]:
data=load_breast_cancer()
x=data.data
y=data.target
feature_names= data.feature_names
target_names= data.target_names
#print(feature_names)
#print(target_names)


In [None]:
df=pd.DataFrame(x,columns=feature_names)
df['target']=y
print(df.head())


In [None]:
x_train,x_test,y_train,y_test= train_test_split(x,y,test_size=0.2 , random_state=42)
scaler=StandardScaler()
x_train_scaled= scaler.fit_transform(x_train)
x_test_scaled= scaler.fit_transform(x_test)
model=LogisticRegression(max_iter =1000)
model.fit(x_train_scaled, y_train)
y_pred=model.predict(x_test_scaled)
print("Accuracy:", accuracy_score(y_test,y_pred))
print("/nConfusion Matrix", confusion_matrix(y_test, y_pred))
print("/nClassificarion report",classification_report(y_test,y_pred,target_names=target_names))

In [None]:
# Example new tumor data (dummy values, 30 features)
new_sample = np.array([[14.2, 23.5, 95.0, 650.0, 0.090, 0.1, 0.08, 0.05, 0.18, 0.065,
                        0.45, 1.2, 3.2, 45.0, 0.005, 0.02, 0.03, 0.01, 0.02, 0.003,
                        16.0, 30.0, 110.0, 800.0, 0.13, 0.25, 0.20, 0.10, 0.25, 0.08]])

# Predict class: 0 = malignant, 1 = benign
prediction = model.predict(new_sample)
prediction_proba = model.predict_proba(new_sample)

# Show prediction
if prediction[0] == 0:
    print("Prediction: Malignant (Cancerous)")
else:
    print("Prediction: Benign (Non-cancerous)")

# Optional: Show probability
print("Probability of being benign:", prediction_proba[0][1])
print("Probability of being malignant:", prediction_proba[0][0])


##### From only 5 important features of breast cancer PREDICTING THE MODEL

In [26]:

from sklearn.ensemble import RandomForestClassifier

# Step 1: Load the data
data = load_breast_cancer()
X_full = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

# Step 2: Get top 10 features using Random Forest
rf = RandomForestClassifier()
rf.fit(X_full, y)

# Get top 10 features
feature_importances = pd.Series(rf.feature_importances_, index=X_full.columns)
top_features = feature_importances.sort_values(ascending=False).head(5).index.tolist()
print("Top 5 features:", top_features)

# Step 3: Use only top 5 features
X = X_full[top_features]

# Step 4: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

# Step 5: Train final classifier (e.g., logistic regression or Random Forest)
final_model = RandomForestClassifier()
final_model.fit(X_train, y_train)

# Step 6: Evaluate
y_pred = final_model.predict(X_test)
print("\nModel evaluation:\n")
print(classification_report(y_test, y_pred))


Top 5 features: ['worst concave points', 'worst perimeter', 'mean concave points', 'worst radius', 'worst area']

Model evaluation:

              precision    recall  f1-score   support

           0       0.89      0.93      0.91        42
           1       0.96      0.93      0.94        72

    accuracy                           0.93       114
   macro avg       0.92      0.93      0.93       114
weighted avg       0.93      0.93      0.93       114



In [40]:
print("Enter values FOR THE FEATURES:")
A=float(input("enter worst concave points"))
B=float(input("enter worst perimeter"))
C=float(input("enter mean concave points"))
D=float(input("enter worst radius"))
E=float(input("enter worst area"))
new_input = pd.DataFrame({
    'worst concave points': [A],
    'worst perimeter': [B],
    'mean concave points': [C],
    'worst radius': [D],
    'worst area': [E]
})
pred = final_model.predict(new_input)
pred_proba = final_model.predict_proba(new_input)
# Output
if pred[0] == 0:
    print("Prediction: Malignant (Cancerous)")
else:
    print("Prediction: Benign (Non-Cancerous)")

print("Probability of being benign:", pred_proba[0][1])
print("Probability of being malignant:", pred_proba[0][0])



enter worst concave points 0.08
enter worst perimeter 95
enter mean concave points 0.03
enter worst radius 15
enter worst area 500


Prediction: Benign (Non-Cancerous)
Probability of being benign: 0.88
Probability of being malignant: 0.12


In [41]:
""" SAMPLE INPUT
For Benign (Non-Cancerous) sample:
enter worst concave points: 0.05
enter worst perimeter: 90.2
enter mean concave points: 0.02
enter worst radius: 13.5
enter worst area: 550.0"""

""" 
For Malignant (Cancerous) sample:
enter worst concave points: 0.2
enter worst perimeter: 115.0
enter mean concave points: 0.09
enter worst radius: 18.0
enter worst area: 950.0 """

' \nFor Malignant (Cancerous) sample:\nenter worst concave points: 0.2\nenter worst perimeter: 115.0\nenter mean concave points: 0.09\nenter worst radius: 18.0\nenter worst area: 950.0 '