In [1]:
import kagglehub
import shutil
import os
import zipfile

# Download the dataset
dataset_path = kagglehub.dataset_download("francismon/curated-colon-dataset-for-deep-learning")
print("Downloaded to:", dataset_path)

# Manually copy from kagglehub directory to /content
source_dir = dataset_path
target_dir = "/content/colon_dataset"

# Recursively copy the directory to /content
shutil.copytree(source_dir, target_dir, dirs_exist_ok=True)

# List contents of the copied folder
for root, dirs, files in os.walk(target_dir):
    print(f"Directory: {root}")
    for f in files:
        print(f" - {f}")
    break  # Just show the top-level


Downloading from https://www.kaggle.com/api/v1/datasets/download/francismon/curated-colon-dataset-for-deep-learning?dataset_version_number=1...


100%|██████████| 1.41G/1.41G [00:14<00:00, 104MB/s]

Extracting files...





Downloaded to: /root/.cache/kagglehub/datasets/francismon/curated-colon-dataset-for-deep-learning/versions/1
Directory: /content/colon_dataset


In [2]:
import os

# List all files and directories inside the dataset
for root, dirs, files in os.walk("/content/colon_dataset"):
    print(f"Directory: {root}")
    for dir_name in dirs:
        print(f"  └── {dir_name}")
    break  # only show the top-level structure


Directory: /content/colon_dataset
  └── test
  └── train
  └── val


In [3]:
import os

# List all files and directories inside the dataset
for root, dirs, files in os.walk("/content/colon_dataset/val"):
    print(f"Directory: {root}")
    for dir_name in dirs:
        print(f"  └── {dir_name}")
    break  # only show the top-level structure


Directory: /content/colon_dataset/val
  └── 3_esophagitis
  └── 1_ulcerative_colitis
  └── 2_polyps
  └── 0_normal


In [4]:
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score, f1_score, precision_score, recall_score, average_precision_score
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import seaborn as sns


In [5]:
import os
import shutil
import random

# Base original data directory
base_dir = "/content/colon_dataset"
train_dir = os.path.join(base_dir, "train")
val_dir = os.path.join(base_dir, "val")
test_dir = os.path.join(base_dir, "test")

# New root for fixed structure
fixed_base_dir = "/content/colon_data_fixed"
combined_dir = os.path.join(fixed_base_dir, "all_data")
new_train_dir = os.path.join(fixed_base_dir, "train")
new_test_dir = os.path.join(fixed_base_dir, "test")
new_val_dir = os.path.join(fixed_base_dir, "val")

# Create fixed base directory
os.makedirs(combined_dir, exist_ok=True)

# Combine train + test data into one "all_data" folder
for source_folder in [train_dir, test_dir]:
    for class_name in os.listdir(source_folder):
        src_path = os.path.join(source_folder, class_name)
        dst_path = os.path.join(combined_dir, class_name)
        os.makedirs(dst_path, exist_ok=True)
        for file in os.listdir(src_path):
            shutil.copy(os.path.join(src_path, file), os.path.join(dst_path, file))

# Function to split into 80% train / 20% test
def split_data(source_dir, train_dir, test_dir, ratio=0.8):
    for class_name in os.listdir(source_dir):
        class_path = os.path.join(source_dir, class_name)
        files = os.listdir(class_path)
        random.shuffle(files)
        split_idx = int(len(files) * ratio)

        train_files = files[:split_idx]
        test_files = files[split_idx:]

        for out_dir, file_list in [(train_dir, train_files), (test_dir, test_files)]:
            class_out = os.path.join(out_dir, class_name)
            os.makedirs(class_out, exist_ok=True)
            for f in file_list:
                shutil.copy(os.path.join(class_path, f), os.path.join(class_out, f))

# Apply the split
split_data(combined_dir, new_train_dir, new_test_dir)

# Copy val data unchanged
shutil.copytree(val_dir, new_val_dir, dirs_exist_ok=True)

print("✅ Split complete!")
print("Train path:", new_train_dir)
print("Test path:", new_test_dir)
print("Validation path:", new_val_dir)


✅ Split complete!
Train path: /content/colon_data_fixed/train
Test path: /content/colon_data_fixed/test
Validation path: /content/colon_data_fixed/val


In [6]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Original train folder
original_train_dir = '/content/colon_dataset/train'
train_split_dir = '/content/colon_dataset/train_split'
test_split_dir = '/content/colon_dataset/test_split'

# Make new directories
os.makedirs(train_split_dir, exist_ok=True)
os.makedirs(test_split_dir, exist_ok=True)

for class_name in os.listdir(original_train_dir):
    class_dir = os.path.join(original_train_dir, class_name)
    if os.path.isdir(class_dir):
        images = os.listdir(class_dir)
        train_imgs, test_imgs = train_test_split(images, test_size=0.2, random_state=42)

        os.makedirs(os.path.join(train_split_dir, class_name), exist_ok=True)
        os.makedirs(os.path.join(test_split_dir, class_name), exist_ok=True)

        # Move images
        for img in train_imgs:
            shutil.copy2(os.path.join(class_dir, img), os.path.join(train_split_dir, class_name, img))
        for img in test_imgs:
            shutil.copy2(os.path.join(class_dir, img), os.path.join(test_split_dir, class_name, img))

print("✅ Train/Test split (80/20) completed.")


✅ Train/Test split (80/20) completed.


In [7]:
train_dir = '/content/colon_dataset/train_split'
test_dir = '/content/colon_dataset/test_split'
val_dir = '/content/colon_dataset/val'

# [Keep the rest of the code unchanged]


In [8]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

img_size = (224, 224)
batch_size = 32

train_dir = '/content/colon_dataset/train_split'
test_dir = '/content/colon_dataset/test_split'
val_dir = '/content/colon_dataset/val'

train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)


Found 2560 images belonging to 4 classes.
Found 2000 images belonging to 4 classes.
Found 640 images belonging to 4 classes.


In [9]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

img_size = (224, 224)
batch_size = 32

# Training and augmentation
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    '/content/colon_dataset/train_split',
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    '/content/colon_dataset/test_split',
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

val_generator = val_datagen.flow_from_directory(
    '/content/colon_dataset/val',
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical'
)


Found 2560 images belonging to 4 classes.
Found 640 images belonging to 4 classes.
Found 2000 images belonging to 4 classes.


In [10]:
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam

base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
predictions = Dense(4, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# Freeze base model
for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m29084464/29084464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [11]:
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator
)


  self._warn_if_super_not_called()


Epoch 1/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 762ms/step - accuracy: 0.2643 - loss: 2.0035 - val_accuracy: 0.4400 - val_loss: 1.2484
Epoch 2/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 431ms/step - accuracy: 0.3801 - loss: 1.5998 - val_accuracy: 0.5835 - val_loss: 1.0564
Epoch 3/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 422ms/step - accuracy: 0.4855 - loss: 1.3047 - val_accuracy: 0.6625 - val_loss: 0.9043
Epoch 4/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 434ms/step - accuracy: 0.5355 - loss: 1.1251 - val_accuracy: 0.6825 - val_loss: 0.8161
Epoch 5/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 430ms/step - accuracy: 0.5999 - loss: 0.9937 - val_accuracy: 0.7350 - val_loss: 0.7277
Epoch 6/10
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 429ms/step - accuracy: 0.6423 - loss: 0.8761 - val_accuracy: 0.7540 - val_loss: 0.6782
Epoch 7/10
[1m80/80[

In [12]:
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score, f1_score, precision_score, recall_score
import numpy as np

# Predict on test set
y_true = test_generator.classes
y_pred_probs = model.predict(test_generator)
y_pred = np.argmax(y_pred_probs, axis=1)

# Basic metrics
accuracy = np.mean(y_true == y_pred)
f1 = f1_score(y_true, y_pred, average='weighted')
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
roc_auc = roc_auc_score(y_true, y_pred_probs, multi_class='ovr')
aupr = average_precision_score(y_true, y_pred_probs, average='weighted')
conf_matrix = confusion_matrix(y_true, y_pred)
report = classification_report(y_true, y_pred)

# Print results
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ ROC AUC Score: {roc_auc:.4f}")
print(f"✅ AUPR Score: {aupr:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1 Score: {f1:.4f}")
print(f"\n✅ Confusion Matrix:\n{conf_matrix}")
print(f"\n✅ Classification Report:\n{report}")


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 214ms/step
✅ Accuracy: 0.9328
✅ ROC AUC Score: 0.9919
✅ AUPR Score: 0.9792
✅ Precision: 0.9326
✅ Recall: 0.9328
✅ F1 Score: 0.9323

✅ Confusion Matrix:
[[157   2   1   0]
 [  8 140  11   1]
 [  4  10 143   3]
 [  1   1   1 157]]

✅ Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.98      0.95       160
           1       0.92      0.88      0.89       160
           2       0.92      0.89      0.91       160
           3       0.98      0.98      0.98       160

    accuracy                           0.93       640
   macro avg       0.93      0.93      0.93       640
weighted avg       0.93      0.93      0.93       640



In [13]:
import numpy as np
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import (accuracy_score, roc_auc_score, average_precision_score,
                             precision_score, recall_score, f1_score, confusion_matrix,
                             classification_report)
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GaussianNoise, Dropout
from tensorflow.keras.applications.densenet import preprocess_input
from tqdm import tqdm

# -----------------------------------
# 1. Load DenseNet121 with Regularization
# -----------------------------------
base_model = DenseNet121(weights='imagenet', include_top=False, pooling='avg', input_shape=(224, 224, 3))

# Add Gaussian Noise and Dropout for regularization
x = GaussianNoise(0.1)(base_model.output)
x = Dropout(0.5)(x)

# Final feature extraction model
feature_model = Model(inputs=base_model.input, outputs=x)

# -----------------------------------
# 2. Feature Extraction Function
# -----------------------------------
def extract_features(generator, model):
    features, labels = [], []
    for i in tqdm(range(len(generator))):
        x_batch, y_batch = generator[i]
        x_batch = preprocess_input(x_batch)
        batch_features = model.predict(x_batch, verbose=0)
        features.append(batch_features)
        labels.append(y_batch)
    return np.vstack(features), np.concatenate(labels)

# -----------------------------------
# 3. Extract Features from Train/Val/Test Sets
# (Assumes `train_generator`, `val_generator`, and `test_generator` are already defined)
# -----------------------------------
train_features, train_labels = extract_features(train_generator, feature_model)
val_features, val_labels = extract_features(val_generator, feature_model)
test_features, test_labels = extract_features(test_generator, feature_model)



100%|██████████| 80/80 [00:41<00:00,  1.93it/s]
100%|██████████| 63/63 [00:36<00:00,  1.74it/s]
100%|██████████| 20/20 [00:08<00:00,  2.36it/s]


In [14]:
from sklearn.decomposition import PCA
pca = PCA(n_components=0.95)  # Keep 95% variance
train_features = pca.fit_transform(train_features)
val_features = pca.transform(val_features)
test_features = pca.transform(test_features)


In [15]:
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.metrics import (
    accuracy_score, roc_auc_score, average_precision_score,
    precision_score, recall_score, f1_score, confusion_matrix, classification_report
)
from sklearn.preprocessing import label_binarize
import numpy as np

# Fix the label shape
train_labels = np.argmax(train_labels, axis=1)
test_labels = np.argmax(test_labels, axis=1)

# Train SVM
svm_clf = make_pipeline(StandardScaler(), SVC(kernel='rbf', probability=True, C=10, gamma='scale', random_state=42))
svm_clf.fit(train_features, train_labels)

# Predict
pred_probs = svm_clf.predict_proba(test_features)
pred_labels = np.argmax(pred_probs, axis=1)

# Binarize for multi-class metrics
n_classes = len(np.unique(test_labels))
true_binarized = label_binarize(test_labels, classes=list(range(n_classes)))
pred_binarized = label_binarize(pred_labels, classes=list(range(n_classes)))

# Metrics
accuracy = accuracy_score(test_labels, pred_labels)
roc_auc = roc_auc_score(true_binarized, pred_probs, average='macro', multi_class='ovr')
aupr = average_precision_score(true_binarized, pred_probs, average='macro')
precision = precision_score(test_labels, pred_labels, average='macro')
recall = recall_score(test_labels, pred_labels, average='macro')
f1 = f1_score(test_labels, pred_labels, average='macro')
cm = confusion_matrix(test_labels, pred_labels)
report = classification_report(test_labels, pred_labels, target_names=test_generator.class_indices.keys())

# Output
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ ROC AUC Score: {roc_auc:.4f}")
print(f"✅ AUPR Score: {aupr:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1 Score: {f1:.4f}")
print("\n✅ Confusion Matrix:")
print(cm)
print("\n✅ Classification Report:")
print(report)


✅ Accuracy: 0.9219
✅ ROC AUC Score: 0.9898
✅ AUPR Score: 0.9730
✅ Precision: 0.9222
✅ Recall: 0.9219
✅ F1 Score: 0.9215

✅ Confusion Matrix:
[[159   1   0   0]
 [  1 130  25   4]
 [  0  13 146   1]
 [  0   5   0 155]]

✅ Classification Report:
                      precision    recall  f1-score   support

            0_normal       0.99      0.99      0.99       160
1_ulcerative_colitis       0.87      0.81      0.84       160
            2_polyps       0.85      0.91      0.88       160
       3_esophagitis       0.97      0.97      0.97       160

            accuracy                           0.92       640
           macro avg       0.92      0.92      0.92       640
        weighted avg       0.92      0.92      0.92       640



In [16]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import (
    accuracy_score, roc_auc_score, average_precision_score,
    precision_score, recall_score, f1_score, confusion_matrix, classification_report
)
import numpy as np

# Optional: scale features (important for some classifiers, less so for RF but keeps consistency)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(train_features)
X_test_scaled = scaler.transform(test_features)

# Train Random Forest
rf_clf = RandomForestClassifier(n_estimators=200, max_depth=20, random_state=42, n_jobs=-1)
rf_clf.fit(X_train_scaled, train_labels)

# Predict
pred_probs = rf_clf.predict_proba(X_test_scaled)
pred_labels = np.argmax(pred_probs, axis=1)

# Binarize for multi-class AUC and AUPR
n_classes = len(np.unique(test_labels))
true_binarized = label_binarize(test_labels, classes=list(range(n_classes)))
pred_binarized = label_binarize(pred_labels, classes=list(range(n_classes)))

# Evaluation
accuracy = accuracy_score(test_labels, pred_labels)
roc_auc = roc_auc_score(true_binarized, pred_probs, average='macro', multi_class='ovr')
aupr = average_precision_score(true_binarized, pred_probs, average='macro')
precision = precision_score(test_labels, pred_labels, average='macro')
recall = recall_score(test_labels, pred_labels, average='macro')
f1 = f1_score(test_labels, pred_labels, average='macro')
cm = confusion_matrix(test_labels, pred_labels)
report = classification_report(test_labels, pred_labels)

# Output
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ ROC AUC Score: {roc_auc:.4f}")
print(f"✅ AUPR Score: {aupr:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1 Score: {f1:.4f}")

print("\n✅ Confusion Matrix:")
print(cm)

print("\n✅ Classification Report:")
print(report)


✅ Accuracy: 0.9031
✅ ROC AUC Score: 0.9817
✅ AUPR Score: 0.9510
✅ Precision: 0.9034
✅ Recall: 0.9031
✅ F1 Score: 0.9031

✅ Confusion Matrix:
[[155   5   0   0]
 [  3 128  25   4]
 [  0  18 141   1]
 [  1   4   1 154]]

✅ Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.97      0.97       160
           1       0.83      0.80      0.81       160
           2       0.84      0.88      0.86       160
           3       0.97      0.96      0.97       160

    accuracy                           0.90       640
   macro avg       0.90      0.90      0.90       640
weighted avg       0.90      0.90      0.90       640



In [17]:
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import (
    accuracy_score, roc_auc_score, average_precision_score,
    precision_score, recall_score, f1_score, confusion_matrix, classification_report
)
import numpy as np

# Scale features (helps even with XGBoost sometimes)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(train_features)
X_test_scaled = scaler.transform(test_features)

# Initialize XGBoost classifier
xgb_clf = XGBClassifier(
    n_estimators=200,
    max_depth=8,
    learning_rate=0.05,
    objective='multi:softprob',
    num_class=4,
    use_label_encoder=False,
    eval_metric='mlogloss',
    random_state=42,
    n_jobs=-1
)

# Train
xgb_clf.fit(X_train_scaled, train_labels)

# Predict probabilities and labels
pred_probs = xgb_clf.predict_proba(X_test_scaled)
pred_labels = np.argmax(pred_probs, axis=1)

# Binarize for multi-class AUC and AUPR
n_classes = len(np.unique(test_labels))
true_binarized = label_binarize(test_labels, classes=list(range(n_classes)))
pred_binarized = label_binarize(pred_labels, classes=list(range(n_classes)))

# Evaluation metrics
accuracy = accuracy_score(test_labels, pred_labels)
roc_auc = roc_auc_score(true_binarized, pred_probs, average='macro', multi_class='ovr')
aupr = average_precision_score(true_binarized, pred_probs, average='macro')
precision = precision_score(test_labels, pred_labels, average='macro')
recall = recall_score(test_labels, pred_labels, average='macro')
f1 = f1_score(test_labels, pred_labels, average='macro')
cm = confusion_matrix(test_labels, pred_labels)
report = classification_report(test_labels, pred_labels)

# Results
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ ROC AUC Score: {roc_auc:.4f}")
print(f"✅ AUPR Score: {aupr:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1 Score: {f1:.4f}")

print("\n✅ Confusion Matrix:")
print(cm)

print("\n✅ Classification Report:")
print(report)


Parameters: { "use_label_encoder" } are not used.



✅ Accuracy: 0.8906
✅ ROC AUC Score: 0.9798
✅ AUPR Score: 0.9482
✅ Precision: 0.8893
✅ Recall: 0.8906
✅ F1 Score: 0.8892

✅ Confusion Matrix:
[[158   1   0   1]
 [  2 119  31   8]
 [  0  21 137   2]
 [  1   3   0 156]]

✅ Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.99      0.98       160
           1       0.83      0.74      0.78       160
           2       0.82      0.86      0.84       160
           3       0.93      0.97      0.95       160

    accuracy                           0.89       640
   macro avg       0.89      0.89      0.89       640
weighted avg       0.89      0.89      0.89       640



In [18]:
from sklearn.ensemble import VotingClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import (
    accuracy_score, roc_auc_score, average_precision_score,
    precision_score, recall_score, f1_score, confusion_matrix, classification_report
)
import numpy as np

# Scale features (very important for SVM)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(train_features)
X_test_scaled = scaler.transform(test_features)

# Define base models
svm_clf = SVC(kernel='rbf', probability=True, C=2, gamma='scale', random_state=42)
rf_clf = RandomForestClassifier(n_estimators=150, max_depth=12, random_state=42, n_jobs=-1)
xgb_clf = XGBClassifier(
    n_estimators=200,
    max_depth=8,
    learning_rate=0.05,
    objective='multi:softprob',
    num_class=4,
    use_label_encoder=False,
    eval_metric='mlogloss',
    random_state=42,
    n_jobs=-1
)

# Voting Classifier (soft voting based on probabilities)
voting_clf = VotingClassifier(
    estimators=[('svm', svm_clf), ('rf', rf_clf), ('xgb', xgb_clf)],
    voting='soft',  # soft = use predicted probabilities
    n_jobs=-1
)

# Train ensemble
voting_clf.fit(X_train_scaled, train_labels)

# Predict
pred_probs = voting_clf.predict_proba(X_test_scaled)
pred_labels = np.argmax(pred_probs, axis=1)

# Binarize for AUC/AUPR
n_classes = len(np.unique(test_labels))
true_binarized = label_binarize(test_labels, classes=list(range(n_classes)))
pred_binarized = label_binarize(pred_labels, classes=list(range(n_classes)))

# Metrics
accuracy = accuracy_score(test_labels, pred_labels)
roc_auc = roc_auc_score(true_binarized, pred_probs, average='macro', multi_class='ovr')
aupr = average_precision_score(true_binarized, pred_probs, average='macro')
precision = precision_score(test_labels, pred_labels, average='macro')
recall = recall_score(test_labels, pred_labels, average='macro')
f1 = f1_score(test_labels, pred_labels, average='macro')
cm = confusion_matrix(test_labels, pred_labels)
report = classification_report(test_labels, pred_labels)

# Print results
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ ROC AUC Score: {roc_auc:.4f}")
print(f"✅ AUPR Score: {aupr:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1 Score: {f1:.4f}")

print("\n✅ Confusion Matrix:")
print(cm)

print("\n✅ Classification Report:")
print(report)


✅ Accuracy: 0.9219
✅ ROC AUC Score: 0.9894
✅ AUPR Score: 0.9731
✅ Precision: 0.9229
✅ Recall: 0.9219
✅ F1 Score: 0.9214

✅ Confusion Matrix:
[[159   1   0   0]
 [  1 129  26   4]
 [  0  13 146   1]
 [  1   1   2 156]]

✅ Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       160
           1       0.90      0.81      0.85       160
           2       0.84      0.91      0.87       160
           3       0.97      0.97      0.97       160

    accuracy                           0.92       640
   macro avg       0.92      0.92      0.92       640
weighted avg       0.92      0.92      0.92       640



In [19]:
from sklearn.ensemble import StackingClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import (
    accuracy_score, roc_auc_score, average_precision_score,
    precision_score, recall_score, f1_score, confusion_matrix, classification_report
)
import numpy as np

# Feature scaling (essential for SVM)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(train_features)
X_test_scaled = scaler.transform(test_features)


# Meta learner
meta_learner = LogisticRegression(max_iter=1000, multi_class='multinomial', solver='lbfgs')

# Stacking classifier
stacking_clf = StackingClassifier(
    estimators=[
        ('svm', svm_clf),
        ('rf', rf_clf),
        ('xgb', xgb_clf)
    ],
    final_estimator=meta_learner,
    stack_method='predict_proba',  # Important for multiclass classification
    cv=5,
    n_jobs=-1,
    passthrough=False
)

# Train ensemble
stacking_clf.fit(X_train_scaled, train_labels)

# Predict
pred_probs = stacking_clf.predict_proba(X_test_scaled)
pred_labels = np.argmax(pred_probs, axis=1)

# Binarize for multiclass AUC/AUPR
n_classes = len(np.unique(test_labels))
true_binarized = label_binarize(test_labels, classes=list(range(n_classes)))
pred_binarized = label_binarize(pred_labels, classes=list(range(n_classes)))

# Evaluation
accuracy = accuracy_score(test_labels, pred_labels)
roc_auc = roc_auc_score(true_binarized, pred_probs, average='macro', multi_class='ovr')
aupr = average_precision_score(true_binarized, pred_probs, average='macro')
precision = precision_score(test_labels, pred_labels, average='macro')
recall = recall_score(test_labels, pred_labels, average='macro')
f1 = f1_score(test_labels, pred_labels, average='macro')
cm = confusion_matrix(test_labels, pred_labels)
report = classification_report(test_labels, pred_labels)

# Print metrics
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ ROC AUC Score: {roc_auc:.4f}")
print(f"✅ AUPR Score: {aupr:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1 Score: {f1:.4f}")
print("\n✅ Confusion Matrix:")
print(cm)
print("\n✅ Classification Report:")
print(report)


✅ Accuracy: 0.9297
✅ ROC AUC Score: 0.9922
✅ AUPR Score: 0.9797
✅ Precision: 0.9304
✅ Recall: 0.9297
✅ F1 Score: 0.9298

✅ Confusion Matrix:
[[159   1   0   0]
 [  1 136  21   2]
 [  0  13 146   1]
 [  0   4   2 154]]

✅ Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       160
           1       0.88      0.85      0.87       160
           2       0.86      0.91      0.89       160
           3       0.98      0.96      0.97       160

    accuracy                           0.93       640
   macro avg       0.93      0.93      0.93       640
weighted avg       0.93      0.93      0.93       640





90 10

In [27]:
import os
import shutil
import random

# Base original data directory
base_dir = "/content/colon_dataset"
train_dir = os.path.join(base_dir, "train")
val_dir = os.path.join(base_dir, "val")
test_dir = os.path.join(base_dir, "test")

# New root for fixed structure
fixed_base_dir = "/content/colon_data_fixed"
combined_dir = os.path.join(fixed_base_dir, "all_data")
new_train_dir = os.path.join(fixed_base_dir, "train")
new_test_dir = os.path.join(fixed_base_dir, "test")
new_val_dir = os.path.join(fixed_base_dir, "val")

# Create fixed base directory
os.makedirs(combined_dir, exist_ok=True)

# Combine train + test data into one "all_data" folder
for source_folder in [train_dir, test_dir]:
    for class_name in os.listdir(source_folder):
        src_path = os.path.join(source_folder, class_name)
        dst_path = os.path.join(combined_dir, class_name)
        os.makedirs(dst_path, exist_ok=True)
        for file in os.listdir(src_path):
            shutil.copy(os.path.join(src_path, file), os.path.join(dst_path, file))

# Function to split into 80% train / 20% test
def split_data(source_dir, train_dir, test_dir, ratio=0.9):
    for class_name in os.listdir(source_dir):
        class_path = os.path.join(source_dir, class_name)
        files = os.listdir(class_path)
        random.shuffle(files)
        split_idx = int(len(files) * ratio)

        train_files = files[:split_idx]
        test_files = files[split_idx:]

        for out_dir, file_list in [(train_dir, train_files), (test_dir, test_files)]:
            class_out = os.path.join(out_dir, class_name)
            os.makedirs(class_out, exist_ok=True)
            for f in file_list:
                shutil.copy(os.path.join(class_path, f), os.path.join(class_out, f))

# Apply the split
split_data(combined_dir, new_train_dir, new_test_dir)

# Copy val data unchanged
shutil.copytree(val_dir, new_val_dir, dirs_exist_ok=True)

print("✅ Split complete!")
print("Train path:", new_train_dir)
print("Test path:", new_test_dir)
print("Validation path:", new_val_dir)


✅ Split complete!
Train path: /content/colon_data_fixed/train
Test path: /content/colon_data_fixed/test
Validation path: /content/colon_data_fixed/val


In [28]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Original train folder
original_train_dir = '/content/colon_dataset/train'
train_split_dir = '/content/colon_dataset/train_split'
test_split_dir = '/content/colon_dataset/test_split'

# Make new directories
os.makedirs(train_split_dir, exist_ok=True)
os.makedirs(test_split_dir, exist_ok=True)

for class_name in os.listdir(original_train_dir):
    class_dir = os.path.join(original_train_dir, class_name)
    if os.path.isdir(class_dir):
        images = os.listdir(class_dir)
        train_imgs, test_imgs = train_test_split(images, test_size=0.1, random_state=42)

        os.makedirs(os.path.join(train_split_dir, class_name), exist_ok=True)
        os.makedirs(os.path.join(test_split_dir, class_name), exist_ok=True)

        # Move images
        for img in train_imgs:
            shutil.copy2(os.path.join(class_dir, img), os.path.join(train_split_dir, class_name, img))
        for img in test_imgs:
            shutil.copy2(os.path.join(class_dir, img), os.path.join(test_split_dir, class_name, img))

print("✅ Train/Test split (90/10) completed.")


✅ Train/Test split (90/10) completed.


In [29]:
train_dir = '/content/colon_dataset/train_split'
test_dir = '/content/colon_dataset/test_split'
val_dir = '/content/colon_dataset/val'

# [Keep the rest of the code unchanged]


In [30]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

img_size = (224, 224)
batch_size = 32

train_dir = '/content/colon_dataset/train_split'
test_dir = '/content/colon_dataset/test_split'
val_dir = '/content/colon_dataset/val'

train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)


Found 2880 images belonging to 4 classes.
Found 2000 images belonging to 4 classes.
Found 640 images belonging to 4 classes.


In [31]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

img_size = (224, 224)
batch_size = 32

# Training and augmentation
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    '/content/colon_dataset/train_split',
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    '/content/colon_dataset/test_split',
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

val_generator = val_datagen.flow_from_directory(
    '/content/colon_dataset/val',
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical'
)


Found 2880 images belonging to 4 classes.
Found 640 images belonging to 4 classes.
Found 2000 images belonging to 4 classes.


In [32]:
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam

base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
predictions = Dense(4, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# Freeze base model
for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])


In [33]:
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator
)


  self._warn_if_super_not_called()


Epoch 1/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 767ms/step - accuracy: 0.2815 - loss: 2.0738 - val_accuracy: 0.5090 - val_loss: 1.1497
Epoch 2/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 528ms/step - accuracy: 0.4336 - loss: 1.4888 - val_accuracy: 0.5865 - val_loss: 0.9875
Epoch 3/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 883ms/step - accuracy: 0.5009 - loss: 1.2275 - val_accuracy: 0.6560 - val_loss: 0.8430
Epoch 4/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 416ms/step - accuracy: 0.5664 - loss: 1.0658 - val_accuracy: 0.6975 - val_loss: 0.7540
Epoch 5/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 419ms/step - accuracy: 0.6401 - loss: 0.8733 - val_accuracy: 0.7310 - val_loss: 0.6830
Epoch 6/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 416ms/step - accuracy: 0.6809 - loss: 0.8066 - val_accuracy: 0.7565 - val_loss: 0.6400
Epoch 7/10
[1m90/90[

In [34]:
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score, f1_score, precision_score, recall_score
import numpy as np

# Predict on test set
y_true = test_generator.classes
y_pred_probs = model.predict(test_generator)
y_pred = np.argmax(y_pred_probs, axis=1)

# Basic metrics
accuracy = np.mean(y_true == y_pred)
f1 = f1_score(y_true, y_pred, average='weighted')
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
roc_auc = roc_auc_score(y_true, y_pred_probs, multi_class='ovr')
aupr = average_precision_score(y_true, y_pred_probs, average='weighted')
conf_matrix = confusion_matrix(y_true, y_pred)
report = classification_report(y_true, y_pred)

# Print results
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ ROC AUC Score: {roc_auc:.4f}")
print(f"✅ AUPR Score: {aupr:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1 Score: {f1:.4f}")
print(f"\n✅ Confusion Matrix:\n{conf_matrix}")
print(f"\n✅ Classification Report:\n{report}")


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 217ms/step
✅ Accuracy: 0.9437
✅ ROC AUC Score: 0.9921
✅ AUPR Score: 0.9777
✅ Precision: 0.9434
✅ Recall: 0.9437
✅ F1 Score: 0.9434

✅ Confusion Matrix:
[[159   0   1   0]
 [  3 145  10   2]
 [  3  13 143   1]
 [  2   0   1 157]]

✅ Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.99      0.97       160
           1       0.92      0.91      0.91       160
           2       0.92      0.89      0.91       160
           3       0.98      0.98      0.98       160

    accuracy                           0.94       640
   macro avg       0.94      0.94      0.94       640
weighted avg       0.94      0.94      0.94       640



In [35]:
import numpy as np
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import (accuracy_score, roc_auc_score, average_precision_score,
                             precision_score, recall_score, f1_score, confusion_matrix,
                             classification_report)
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GaussianNoise, Dropout
from tensorflow.keras.applications.densenet import preprocess_input
from tqdm import tqdm

# -----------------------------------
# 1. Load DenseNet121 with Regularization
# -----------------------------------
base_model = DenseNet121(weights='imagenet', include_top=False, pooling='avg', input_shape=(224, 224, 3))

# Add Gaussian Noise and Dropout for regularization
x = GaussianNoise(0.1)(base_model.output)
x = Dropout(0.5)(x)

# Final feature extraction model
feature_model = Model(inputs=base_model.input, outputs=x)

# -----------------------------------
# 2. Feature Extraction Function
# -----------------------------------
def extract_features(generator, model):
    features, labels = [], []
    for i in tqdm(range(len(generator))):
        x_batch, y_batch = generator[i]
        x_batch = preprocess_input(x_batch)
        batch_features = model.predict(x_batch, verbose=0)
        features.append(batch_features)
        labels.append(y_batch)
    return np.vstack(features), np.concatenate(labels)

# -----------------------------------
# 3. Extract Features from Train/Val/Test Sets
# (Assumes `train_generator`, `val_generator`, and `test_generator` are already defined)
# -----------------------------------
train_features, train_labels = extract_features(train_generator, feature_model)
val_features, val_labels = extract_features(val_generator, feature_model)
test_features, test_labels = extract_features(test_generator, feature_model)



100%|██████████| 90/90 [00:47<00:00,  1.89it/s]
100%|██████████| 63/63 [00:35<00:00,  1.75it/s]
100%|██████████| 20/20 [00:09<00:00,  2.21it/s]


In [36]:
from sklearn.decomposition import PCA
pca = PCA(n_components=0.95)  # Keep 95% variance
train_features = pca.fit_transform(train_features)
val_features = pca.transform(val_features)
test_features = pca.transform(test_features)


In [37]:
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.metrics import (
    accuracy_score, roc_auc_score, average_precision_score,
    precision_score, recall_score, f1_score, confusion_matrix, classification_report
)
from sklearn.preprocessing import label_binarize
import numpy as np

# Fix the label shape
train_labels = np.argmax(train_labels, axis=1)
test_labels = np.argmax(test_labels, axis=1)

# Train SVM
svm_clf = make_pipeline(StandardScaler(), SVC(kernel='rbf', probability=True, C=10, gamma='scale', random_state=42))
svm_clf.fit(train_features, train_labels)

# Predict
pred_probs = svm_clf.predict_proba(test_features)
pred_labels = np.argmax(pred_probs, axis=1)

# Binarize for multi-class metrics
n_classes = len(np.unique(test_labels))
true_binarized = label_binarize(test_labels, classes=list(range(n_classes)))
pred_binarized = label_binarize(pred_labels, classes=list(range(n_classes)))

# Metrics
accuracy = accuracy_score(test_labels, pred_labels)
roc_auc = roc_auc_score(true_binarized, pred_probs, average='macro', multi_class='ovr')
aupr = average_precision_score(true_binarized, pred_probs, average='macro')
precision = precision_score(test_labels, pred_labels, average='macro')
recall = recall_score(test_labels, pred_labels, average='macro')
f1 = f1_score(test_labels, pred_labels, average='macro')
cm = confusion_matrix(test_labels, pred_labels)
report = classification_report(test_labels, pred_labels, target_names=test_generator.class_indices.keys())

# Output
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ ROC AUC Score: {roc_auc:.4f}")
print(f"✅ AUPR Score: {aupr:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1 Score: {f1:.4f}")
print("\n✅ Confusion Matrix:")
print(cm)
print("\n✅ Classification Report:")
print(report)


✅ Accuracy: 0.9578
✅ ROC AUC Score: 0.9962
✅ AUPR Score: 0.9905
✅ Precision: 0.9579
✅ Recall: 0.9578
✅ F1 Score: 0.9577

✅ Confusion Matrix:
[[159   1   0   0]
 [  0 144  13   3]
 [  0   8 152   0]
 [  0   2   0 158]]

✅ Classification Report:
                      precision    recall  f1-score   support

            0_normal       1.00      0.99      1.00       160
1_ulcerative_colitis       0.93      0.90      0.91       160
            2_polyps       0.92      0.95      0.94       160
       3_esophagitis       0.98      0.99      0.98       160

            accuracy                           0.96       640
           macro avg       0.96      0.96      0.96       640
        weighted avg       0.96      0.96      0.96       640



In [38]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import (
    accuracy_score, roc_auc_score, average_precision_score,
    precision_score, recall_score, f1_score, confusion_matrix, classification_report
)
import numpy as np

# Optional: scale features (important for some classifiers, less so for RF but keeps consistency)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(train_features)
X_test_scaled = scaler.transform(test_features)

# Train Random Forest
rf_clf = RandomForestClassifier(n_estimators=200, max_depth=20, random_state=42, n_jobs=-1)
rf_clf.fit(X_train_scaled, train_labels)

# Predict
pred_probs = rf_clf.predict_proba(X_test_scaled)
pred_labels = np.argmax(pred_probs, axis=1)

# Binarize for multi-class AUC and AUPR
n_classes = len(np.unique(test_labels))
true_binarized = label_binarize(test_labels, classes=list(range(n_classes)))
pred_binarized = label_binarize(pred_labels, classes=list(range(n_classes)))

# Evaluation
accuracy = accuracy_score(test_labels, pred_labels)
roc_auc = roc_auc_score(true_binarized, pred_probs, average='macro', multi_class='ovr')
aupr = average_precision_score(true_binarized, pred_probs, average='macro')
precision = precision_score(test_labels, pred_labels, average='macro')
recall = recall_score(test_labels, pred_labels, average='macro')
f1 = f1_score(test_labels, pred_labels, average='macro')
cm = confusion_matrix(test_labels, pred_labels)
report = classification_report(test_labels, pred_labels)

# Output
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ ROC AUC Score: {roc_auc:.4f}")
print(f"✅ AUPR Score: {aupr:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1 Score: {f1:.4f}")

print("\n✅ Confusion Matrix:")
print(cm)

print("\n✅ Classification Report:")
print(report)


✅ Accuracy: 0.9563
✅ ROC AUC Score: 0.9952
✅ AUPR Score: 0.9878
✅ Precision: 0.9571
✅ Recall: 0.9562
✅ F1 Score: 0.9564

✅ Confusion Matrix:
[[158   2   0   0]
 [  0 151   8   1]
 [  0  12 147   1]
 [  1   3   0 156]]

✅ Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       160
           1       0.90      0.94      0.92       160
           2       0.95      0.92      0.93       160
           3       0.99      0.97      0.98       160

    accuracy                           0.96       640
   macro avg       0.96      0.96      0.96       640
weighted avg       0.96      0.96      0.96       640



In [39]:
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import (
    accuracy_score, roc_auc_score, average_precision_score,
    precision_score, recall_score, f1_score, confusion_matrix, classification_report
)
import numpy as np

# Scale features (helps even with XGBoost sometimes)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(train_features)
X_test_scaled = scaler.transform(test_features)

# Initialize XGBoost classifier
xgb_clf = XGBClassifier(
    n_estimators=200,
    max_depth=8,
    learning_rate=0.05,
    objective='multi:softprob',
    num_class=4,
    use_label_encoder=False,
    eval_metric='mlogloss',
    random_state=42,
    n_jobs=-1
)

# Train
xgb_clf.fit(X_train_scaled, train_labels)

# Predict probabilities and labels
pred_probs = xgb_clf.predict_proba(X_test_scaled)
pred_labels = np.argmax(pred_probs, axis=1)

# Binarize for multi-class AUC and AUPR
n_classes = len(np.unique(test_labels))
true_binarized = label_binarize(test_labels, classes=list(range(n_classes)))
pred_binarized = label_binarize(pred_labels, classes=list(range(n_classes)))

# Evaluation metrics
accuracy = accuracy_score(test_labels, pred_labels)
roc_auc = roc_auc_score(true_binarized, pred_probs, average='macro', multi_class='ovr')
aupr = average_precision_score(true_binarized, pred_probs, average='macro')
precision = precision_score(test_labels, pred_labels, average='macro')
recall = recall_score(test_labels, pred_labels, average='macro')
f1 = f1_score(test_labels, pred_labels, average='macro')
cm = confusion_matrix(test_labels, pred_labels)
report = classification_report(test_labels, pred_labels)

# Results
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ ROC AUC Score: {roc_auc:.4f}")
print(f"✅ AUPR Score: {aupr:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1 Score: {f1:.4f}")

print("\n✅ Confusion Matrix:")
print(cm)

print("\n✅ Classification Report:")
print(report)


Parameters: { "use_label_encoder" } are not used.



✅ Accuracy: 0.9484
✅ ROC AUC Score: 0.9953
✅ AUPR Score: 0.9880
✅ Precision: 0.9483
✅ Recall: 0.9484
✅ F1 Score: 0.9484

✅ Confusion Matrix:
[[159   1   0   0]
 [  0 144  13   3]
 [  0  12 147   1]
 [  1   2   0 157]]

✅ Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       160
           1       0.91      0.90      0.90       160
           2       0.92      0.92      0.92       160
           3       0.98      0.98      0.98       160

    accuracy                           0.95       640
   macro avg       0.95      0.95      0.95       640
weighted avg       0.95      0.95      0.95       640



In [40]:
from sklearn.ensemble import VotingClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import (
    accuracy_score, roc_auc_score, average_precision_score,
    precision_score, recall_score, f1_score, confusion_matrix, classification_report
)
import numpy as np

# Scale features (very important for SVM)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(train_features)
X_test_scaled = scaler.transform(test_features)

# Define base models
svm_clf = SVC(kernel='rbf', probability=True, C=2, gamma='scale', random_state=42)
rf_clf = RandomForestClassifier(n_estimators=150, max_depth=12, random_state=42, n_jobs=-1)
xgb_clf = XGBClassifier(
    n_estimators=200,
    max_depth=8,
    learning_rate=0.05,
    objective='multi:softprob',
    num_class=4,
    use_label_encoder=False,
    eval_metric='mlogloss',
    random_state=42,
    n_jobs=-1
)

# Voting Classifier (soft voting based on probabilities)
voting_clf = VotingClassifier(
    estimators=[('svm', svm_clf), ('rf', rf_clf), ('xgb', xgb_clf)],
    voting='soft',  # soft = use predicted probabilities
    n_jobs=-1
)

# Train ensemble
voting_clf.fit(X_train_scaled, train_labels)

# Predict
pred_probs = voting_clf.predict_proba(X_test_scaled)
pred_labels = np.argmax(pred_probs, axis=1)

# Binarize for AUC/AUPR
n_classes = len(np.unique(test_labels))
true_binarized = label_binarize(test_labels, classes=list(range(n_classes)))
pred_binarized = label_binarize(pred_labels, classes=list(range(n_classes)))

# Metrics
accuracy = accuracy_score(test_labels, pred_labels)
roc_auc = roc_auc_score(true_binarized, pred_probs, average='macro', multi_class='ovr')
aupr = average_precision_score(true_binarized, pred_probs, average='macro')
precision = precision_score(test_labels, pred_labels, average='macro')
recall = recall_score(test_labels, pred_labels, average='macro')
f1 = f1_score(test_labels, pred_labels, average='macro')
cm = confusion_matrix(test_labels, pred_labels)
report = classification_report(test_labels, pred_labels)

# Print results
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ ROC AUC Score: {roc_auc:.4f}")
print(f"✅ AUPR Score: {aupr:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1 Score: {f1:.4f}")

print("\n✅ Confusion Matrix:")
print(cm)

print("\n✅ Classification Report:")
print(report)


✅ Accuracy: 0.9641
✅ ROC AUC Score: 0.9969
✅ AUPR Score: 0.9930
✅ Precision: 0.9641
✅ Recall: 0.9641
✅ F1 Score: 0.9640

✅ Confusion Matrix:
[[159   1   0   0]
 [  0 148  10   2]
 [  0   8 152   0]
 [  1   1   0 158]]

✅ Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       160
           1       0.94      0.93      0.93       160
           2       0.94      0.95      0.94       160
           3       0.99      0.99      0.99       160

    accuracy                           0.96       640
   macro avg       0.96      0.96      0.96       640
weighted avg       0.96      0.96      0.96       640



In [41]:
from sklearn.ensemble import StackingClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import (
    accuracy_score, roc_auc_score, average_precision_score,
    precision_score, recall_score, f1_score, confusion_matrix, classification_report
)
import numpy as np

# Feature scaling (essential for SVM)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(train_features)
X_test_scaled = scaler.transform(test_features)


# Meta learner
meta_learner = LogisticRegression(max_iter=1000, multi_class='multinomial', solver='lbfgs')

# Stacking classifier
stacking_clf = StackingClassifier(
    estimators=[
        ('svm', svm_clf),
        ('rf', rf_clf),
        ('xgb', xgb_clf)
    ],
    final_estimator=meta_learner,
    stack_method='predict_proba',  # Important for multiclass classification
    cv=5,
    n_jobs=-1,
    passthrough=False
)

# Train ensemble
stacking_clf.fit(X_train_scaled, train_labels)

# Predict
pred_probs = stacking_clf.predict_proba(X_test_scaled)
pred_labels = np.argmax(pred_probs, axis=1)

# Binarize for multiclass AUC/AUPR
n_classes = len(np.unique(test_labels))
true_binarized = label_binarize(test_labels, classes=list(range(n_classes)))
pred_binarized = label_binarize(pred_labels, classes=list(range(n_classes)))

# Evaluation
accuracy = accuracy_score(test_labels, pred_labels)
roc_auc = roc_auc_score(true_binarized, pred_probs, average='macro', multi_class='ovr')
aupr = average_precision_score(true_binarized, pred_probs, average='macro')
precision = precision_score(test_labels, pred_labels, average='macro')
recall = recall_score(test_labels, pred_labels, average='macro')
f1 = f1_score(test_labels, pred_labels, average='macro')
cm = confusion_matrix(test_labels, pred_labels)
report = classification_report(test_labels, pred_labels)

# Print metrics
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ ROC AUC Score: {roc_auc:.4f}")
print(f"✅ AUPR Score: {aupr:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1 Score: {f1:.4f}")
print("\n✅ Confusion Matrix:")
print(cm)
print("\n✅ Classification Report:")
print(report)


✅ Accuracy: 0.9656
✅ ROC AUC Score: 0.9975
✅ AUPR Score: 0.9937
✅ Precision: 0.9659
✅ Recall: 0.9656
✅ F1 Score: 0.9656

✅ Confusion Matrix:
[[159   0   1   0]
 [  0 147  11   2]
 [  0   6 154   0]
 [  0   2   0 158]]

✅ Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.99      1.00       160
           1       0.95      0.92      0.93       160
           2       0.93      0.96      0.94       160
           3       0.99      0.99      0.99       160

    accuracy                           0.97       640
   macro avg       0.97      0.97      0.97       640
weighted avg       0.97      0.97      0.97       640





70 30

In [42]:
import os
import shutil
import random

# Base original data directory
base_dir = "/content/colon_dataset"
train_dir = os.path.join(base_dir, "train")
val_dir = os.path.join(base_dir, "val")
test_dir = os.path.join(base_dir, "test")

# New root for fixed structure
fixed_base_dir = "/content/colon_data_fixed"
combined_dir = os.path.join(fixed_base_dir, "all_data")
new_train_dir = os.path.join(fixed_base_dir, "train")
new_test_dir = os.path.join(fixed_base_dir, "test")
new_val_dir = os.path.join(fixed_base_dir, "val")

# Create fixed base directory
os.makedirs(combined_dir, exist_ok=True)

# Combine train + test data into one "all_data" folder
for source_folder in [train_dir, test_dir]:
    for class_name in os.listdir(source_folder):
        src_path = os.path.join(source_folder, class_name)
        dst_path = os.path.join(combined_dir, class_name)
        os.makedirs(dst_path, exist_ok=True)
        for file in os.listdir(src_path):
            shutil.copy(os.path.join(src_path, file), os.path.join(dst_path, file))

# Function to split into 80% train / 20% test
def split_data(source_dir, train_dir, test_dir, ratio=0.7):
    for class_name in os.listdir(source_dir):
        class_path = os.path.join(source_dir, class_name)
        files = os.listdir(class_path)
        random.shuffle(files)
        split_idx = int(len(files) * ratio)

        train_files = files[:split_idx]
        test_files = files[split_idx:]

        for out_dir, file_list in [(train_dir, train_files), (test_dir, test_files)]:
            class_out = os.path.join(out_dir, class_name)
            os.makedirs(class_out, exist_ok=True)
            for f in file_list:
                shutil.copy(os.path.join(class_path, f), os.path.join(class_out, f))

# Apply the split
split_data(combined_dir, new_train_dir, new_test_dir)

# Copy val data unchanged
shutil.copytree(val_dir, new_val_dir, dirs_exist_ok=True)

print("✅ Split complete!")
print("Train path:", new_train_dir)
print("Test path:", new_test_dir)
print("Validation path:", new_val_dir)


✅ Split complete!
Train path: /content/colon_data_fixed/train
Test path: /content/colon_data_fixed/test
Validation path: /content/colon_data_fixed/val


In [43]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Original train folder
original_train_dir = '/content/colon_dataset/train'
train_split_dir = '/content/colon_dataset/train_split'
test_split_dir = '/content/colon_dataset/test_split'

# Make new directories
os.makedirs(train_split_dir, exist_ok=True)
os.makedirs(test_split_dir, exist_ok=True)

for class_name in os.listdir(original_train_dir):
    class_dir = os.path.join(original_train_dir, class_name)
    if os.path.isdir(class_dir):
        images = os.listdir(class_dir)
        train_imgs, test_imgs = train_test_split(images, test_size=0.3, random_state=42)

        os.makedirs(os.path.join(train_split_dir, class_name), exist_ok=True)
        os.makedirs(os.path.join(test_split_dir, class_name), exist_ok=True)

        # Move images
        for img in train_imgs:
            shutil.copy2(os.path.join(class_dir, img), os.path.join(train_split_dir, class_name, img))
        for img in test_imgs:
            shutil.copy2(os.path.join(class_dir, img), os.path.join(test_split_dir, class_name, img))

print("✅ Train/Test split (70/30) completed.")


✅ Train/Test split (70/30) completed.


In [44]:
train_dir = '/content/colon_dataset/train_split'
test_dir = '/content/colon_dataset/test_split'
val_dir = '/content/colon_dataset/val'

# [Keep the rest of the code unchanged]


In [45]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

img_size = (224, 224)
batch_size = 32

train_dir = '/content/colon_dataset/train_split'
test_dir = '/content/colon_dataset/test_split'
val_dir = '/content/colon_dataset/val'

train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)


Found 2880 images belonging to 4 classes.
Found 2000 images belonging to 4 classes.
Found 960 images belonging to 4 classes.


In [46]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

img_size = (224, 224)
batch_size = 32

# Training and augmentation
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    '/content/colon_dataset/train_split',
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    '/content/colon_dataset/test_split',
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

val_generator = val_datagen.flow_from_directory(
    '/content/colon_dataset/val',
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical'
)


Found 2880 images belonging to 4 classes.
Found 960 images belonging to 4 classes.
Found 2000 images belonging to 4 classes.


In [47]:
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam

base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
predictions = Dense(4, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# Freeze base model
for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])


In [48]:
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator
)


  self._warn_if_super_not_called()


Epoch 1/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 619ms/step - accuracy: 0.2732 - loss: 2.1259 - val_accuracy: 0.3430 - val_loss: 1.3599
Epoch 2/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 427ms/step - accuracy: 0.3644 - loss: 1.5905 - val_accuracy: 0.4600 - val_loss: 1.1475
Epoch 3/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 416ms/step - accuracy: 0.4767 - loss: 1.3316 - val_accuracy: 0.5900 - val_loss: 0.9759
Epoch 4/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 424ms/step - accuracy: 0.5386 - loss: 1.0752 - val_accuracy: 0.5950 - val_loss: 0.9229
Epoch 5/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 425ms/step - accuracy: 0.6043 - loss: 0.9662 - val_accuracy: 0.6495 - val_loss: 0.8279
Epoch 6/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 421ms/step - accuracy: 0.6847 - loss: 0.8023 - val_accuracy: 0.6965 - val_loss: 0.7470
Epoch 7/10
[1m90/90[

In [49]:
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score, f1_score, precision_score, recall_score
import numpy as np

# Predict on test set
y_true = test_generator.classes
y_pred_probs = model.predict(test_generator)
y_pred = np.argmax(y_pred_probs, axis=1)

# Basic metrics
accuracy = np.mean(y_true == y_pred)
f1 = f1_score(y_true, y_pred, average='weighted')
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
roc_auc = roc_auc_score(y_true, y_pred_probs, multi_class='ovr')
aupr = average_precision_score(y_true, y_pred_probs, average='weighted')
conf_matrix = confusion_matrix(y_true, y_pred)
report = classification_report(y_true, y_pred)

# Print results
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ ROC AUC Score: {roc_auc:.4f}")
print(f"✅ AUPR Score: {aupr:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1 Score: {f1:.4f}")
print(f"\n✅ Confusion Matrix:\n{conf_matrix}")
print(f"\n✅ Classification Report:\n{report}")


[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 251ms/step
✅ Accuracy: 0.9396
✅ ROC AUC Score: 0.9924
✅ AUPR Score: 0.9806
✅ Precision: 0.9391
✅ Recall: 0.9396
✅ F1 Score: 0.9392

✅ Confusion Matrix:
[[238   0   2   0]
 [  6 215  16   3]
 [  4  19 214   3]
 [  2   1   2 235]]

✅ Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.99      0.97       240
           1       0.91      0.90      0.91       240
           2       0.91      0.89      0.90       240
           3       0.98      0.98      0.98       240

    accuracy                           0.94       960
   macro avg       0.94      0.94      0.94       960
weighted avg       0.94      0.94      0.94       960



In [50]:
import numpy as np
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import (accuracy_score, roc_auc_score, average_precision_score,
                             precision_score, recall_score, f1_score, confusion_matrix,
                             classification_report)
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GaussianNoise, Dropout
from tensorflow.keras.applications.densenet import preprocess_input
from tqdm import tqdm

# -----------------------------------
# 1. Load DenseNet121 with Regularization
# -----------------------------------
base_model = DenseNet121(weights='imagenet', include_top=False, pooling='avg', input_shape=(224, 224, 3))

# Add Gaussian Noise and Dropout for regularization
x = GaussianNoise(0.1)(base_model.output)
x = Dropout(0.5)(x)

# Final feature extraction model
feature_model = Model(inputs=base_model.input, outputs=x)

# -----------------------------------
# 2. Feature Extraction Function
# -----------------------------------
def extract_features(generator, model):
    features, labels = [], []
    for i in tqdm(range(len(generator))):
        x_batch, y_batch = generator[i]
        x_batch = preprocess_input(x_batch)
        batch_features = model.predict(x_batch, verbose=0)
        features.append(batch_features)
        labels.append(y_batch)
    return np.vstack(features), np.concatenate(labels)

# -----------------------------------
# 3. Extract Features from Train/Val/Test Sets
# (Assumes `train_generator`, `val_generator`, and `test_generator` are already defined)
# -----------------------------------
train_features, train_labels = extract_features(train_generator, feature_model)
val_features, val_labels = extract_features(val_generator, feature_model)
test_features, test_labels = extract_features(test_generator, feature_model)



100%|██████████| 90/90 [00:45<00:00,  1.96it/s]
100%|██████████| 63/63 [00:36<00:00,  1.75it/s]
100%|██████████| 30/30 [00:12<00:00,  2.44it/s]


In [51]:
from sklearn.decomposition import PCA
pca = PCA(n_components=0.95)  # Keep 95% variance
train_features = pca.fit_transform(train_features)
val_features = pca.transform(val_features)
test_features = pca.transform(test_features)


In [52]:
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.metrics import (
    accuracy_score, roc_auc_score, average_precision_score,
    precision_score, recall_score, f1_score, confusion_matrix, classification_report
)
from sklearn.preprocessing import label_binarize
import numpy as np

# Fix the label shape
train_labels = np.argmax(train_labels, axis=1)
test_labels = np.argmax(test_labels, axis=1)

# Train SVM
svm_clf = make_pipeline(StandardScaler(), SVC(kernel='rbf', probability=True, C=10, gamma='scale', random_state=42))
svm_clf.fit(train_features, train_labels)

# Predict
pred_probs = svm_clf.predict_proba(test_features)
pred_labels = np.argmax(pred_probs, axis=1)

# Binarize for multi-class metrics
n_classes = len(np.unique(test_labels))
true_binarized = label_binarize(test_labels, classes=list(range(n_classes)))
pred_binarized = label_binarize(pred_labels, classes=list(range(n_classes)))

# Metrics
accuracy = accuracy_score(test_labels, pred_labels)
roc_auc = roc_auc_score(true_binarized, pred_probs, average='macro', multi_class='ovr')
aupr = average_precision_score(true_binarized, pred_probs, average='macro')
precision = precision_score(test_labels, pred_labels, average='macro')
recall = recall_score(test_labels, pred_labels, average='macro')
f1 = f1_score(test_labels, pred_labels, average='macro')
cm = confusion_matrix(test_labels, pred_labels)
report = classification_report(test_labels, pred_labels, target_names=test_generator.class_indices.keys())

# Output
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ ROC AUC Score: {roc_auc:.4f}")
print(f"✅ AUPR Score: {aupr:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1 Score: {f1:.4f}")
print("\n✅ Confusion Matrix:")
print(cm)
print("\n✅ Classification Report:")
print(report)


✅ Accuracy: 0.9719
✅ ROC AUC Score: 0.9979
✅ AUPR Score: 0.9950
✅ Precision: 0.9719
✅ Recall: 0.9719
✅ F1 Score: 0.9718

✅ Confusion Matrix:
[[239   1   0   0]
 [  0 224  13   3]
 [  0   8 232   0]
 [  0   2   0 238]]

✅ Classification Report:
                      precision    recall  f1-score   support

            0_normal       1.00      1.00      1.00       240
1_ulcerative_colitis       0.95      0.93      0.94       240
            2_polyps       0.95      0.97      0.96       240
       3_esophagitis       0.99      0.99      0.99       240

            accuracy                           0.97       960
           macro avg       0.97      0.97      0.97       960
        weighted avg       0.97      0.97      0.97       960



In [53]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import (
    accuracy_score, roc_auc_score, average_precision_score,
    precision_score, recall_score, f1_score, confusion_matrix, classification_report
)
import numpy as np

# Optional: scale features (important for some classifiers, less so for RF but keeps consistency)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(train_features)
X_test_scaled = scaler.transform(test_features)

# Train Random Forest
rf_clf = RandomForestClassifier(n_estimators=200, max_depth=20, random_state=42, n_jobs=-1)
rf_clf.fit(X_train_scaled, train_labels)

# Predict
pred_probs = rf_clf.predict_proba(X_test_scaled)
pred_labels = np.argmax(pred_probs, axis=1)

# Binarize for multi-class AUC and AUPR
n_classes = len(np.unique(test_labels))
true_binarized = label_binarize(test_labels, classes=list(range(n_classes)))
pred_binarized = label_binarize(pred_labels, classes=list(range(n_classes)))

# Evaluation
accuracy = accuracy_score(test_labels, pred_labels)
roc_auc = roc_auc_score(true_binarized, pred_probs, average='macro', multi_class='ovr')
aupr = average_precision_score(true_binarized, pred_probs, average='macro')
precision = precision_score(test_labels, pred_labels, average='macro')
recall = recall_score(test_labels, pred_labels, average='macro')
f1 = f1_score(test_labels, pred_labels, average='macro')
cm = confusion_matrix(test_labels, pred_labels)
report = classification_report(test_labels, pred_labels)

# Output
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ ROC AUC Score: {roc_auc:.4f}")
print(f"✅ AUPR Score: {aupr:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1 Score: {f1:.4f}")

print("\n✅ Confusion Matrix:")
print(cm)

print("\n✅ Classification Report:")
print(report)


✅ Accuracy: 0.9698
✅ ROC AUC Score: 0.9979
✅ AUPR Score: 0.9945
✅ Precision: 0.9700
✅ Recall: 0.9698
✅ F1 Score: 0.9699

✅ Confusion Matrix:
[[238   2   0   0]
 [  0 229  10   1]
 [  0  11 228   1]
 [  1   3   0 236]]

✅ Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.99      0.99       240
           1       0.93      0.95      0.94       240
           2       0.96      0.95      0.95       240
           3       0.99      0.98      0.99       240

    accuracy                           0.97       960
   macro avg       0.97      0.97      0.97       960
weighted avg       0.97      0.97      0.97       960



In [54]:
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import (
    accuracy_score, roc_auc_score, average_precision_score,
    precision_score, recall_score, f1_score, confusion_matrix, classification_report
)
import numpy as np

# Scale features (helps even with XGBoost sometimes)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(train_features)
X_test_scaled = scaler.transform(test_features)

# Initialize XGBoost classifier
xgb_clf = XGBClassifier(
    n_estimators=200,
    max_depth=8,
    learning_rate=0.05,
    objective='multi:softprob',
    num_class=4,
    use_label_encoder=False,
    eval_metric='mlogloss',
    random_state=42,
    n_jobs=-1
)

# Train
xgb_clf.fit(X_train_scaled, train_labels)

# Predict probabilities and labels
pred_probs = xgb_clf.predict_proba(X_test_scaled)
pred_labels = np.argmax(pred_probs, axis=1)

# Binarize for multi-class AUC and AUPR
n_classes = len(np.unique(test_labels))
true_binarized = label_binarize(test_labels, classes=list(range(n_classes)))
pred_binarized = label_binarize(pred_labels, classes=list(range(n_classes)))

# Evaluation metrics
accuracy = accuracy_score(test_labels, pred_labels)
roc_auc = roc_auc_score(true_binarized, pred_probs, average='macro', multi_class='ovr')
aupr = average_precision_score(true_binarized, pred_probs, average='macro')
precision = precision_score(test_labels, pred_labels, average='macro')
recall = recall_score(test_labels, pred_labels, average='macro')
f1 = f1_score(test_labels, pred_labels, average='macro')
cm = confusion_matrix(test_labels, pred_labels)
report = classification_report(test_labels, pred_labels)

# Results
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ ROC AUC Score: {roc_auc:.4f}")
print(f"✅ AUPR Score: {aupr:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1 Score: {f1:.4f}")

print("\n✅ Confusion Matrix:")
print(cm)

print("\n✅ Classification Report:")
print(report)


Parameters: { "use_label_encoder" } are not used.



✅ Accuracy: 0.9656
✅ ROC AUC Score: 0.9976
✅ AUPR Score: 0.9940
✅ Precision: 0.9657
✅ Recall: 0.9656
✅ F1 Score: 0.9657

✅ Confusion Matrix:
[[238   1   1   0]
 [  0 225  13   2]
 [  0  12 227   1]
 [  1   2   0 237]]

✅ Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.99      0.99       240
           1       0.94      0.94      0.94       240
           2       0.94      0.95      0.94       240
           3       0.99      0.99      0.99       240

    accuracy                           0.97       960
   macro avg       0.97      0.97      0.97       960
weighted avg       0.97      0.97      0.97       960



In [55]:
from sklearn.ensemble import VotingClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import (
    accuracy_score, roc_auc_score, average_precision_score,
    precision_score, recall_score, f1_score, confusion_matrix, classification_report
)
import numpy as np

# Scale features (very important for SVM)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(train_features)
X_test_scaled = scaler.transform(test_features)

# Define base models
svm_clf = SVC(kernel='rbf', probability=True, C=2, gamma='scale', random_state=42)
rf_clf = RandomForestClassifier(n_estimators=150, max_depth=12, random_state=42, n_jobs=-1)
xgb_clf = XGBClassifier(
    n_estimators=200,
    max_depth=8,
    learning_rate=0.05,
    objective='multi:softprob',
    num_class=4,
    use_label_encoder=False,
    eval_metric='mlogloss',
    random_state=42,
    n_jobs=-1
)

# Voting Classifier (soft voting based on probabilities)
voting_clf = VotingClassifier(
    estimators=[('svm', svm_clf), ('rf', rf_clf), ('xgb', xgb_clf)],
    voting='soft',  # soft = use predicted probabilities
    n_jobs=-1
)

# Train ensemble
voting_clf.fit(X_train_scaled, train_labels)

# Predict
pred_probs = voting_clf.predict_proba(X_test_scaled)
pred_labels = np.argmax(pred_probs, axis=1)

# Binarize for AUC/AUPR
n_classes = len(np.unique(test_labels))
true_binarized = label_binarize(test_labels, classes=list(range(n_classes)))
pred_binarized = label_binarize(pred_labels, classes=list(range(n_classes)))

# Metrics
accuracy = accuracy_score(test_labels, pred_labels)
roc_auc = roc_auc_score(true_binarized, pred_probs, average='macro', multi_class='ovr')
aupr = average_precision_score(true_binarized, pred_probs, average='macro')
precision = precision_score(test_labels, pred_labels, average='macro')
recall = recall_score(test_labels, pred_labels, average='macro')
f1 = f1_score(test_labels, pred_labels, average='macro')
cm = confusion_matrix(test_labels, pred_labels)
report = classification_report(test_labels, pred_labels)

# Print results
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ ROC AUC Score: {roc_auc:.4f}")
print(f"✅ AUPR Score: {aupr:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1 Score: {f1:.4f}")

print("\n✅ Confusion Matrix:")
print(cm)

print("\n✅ Classification Report:")
print(report)


✅ Accuracy: 0.9760
✅ ROC AUC Score: 0.9985
✅ AUPR Score: 0.9965
✅ Precision: 0.9761
✅ Recall: 0.9760
✅ F1 Score: 0.9761

✅ Confusion Matrix:
[[239   1   0   0]
 [  0 228  11   1]
 [  0   8 232   0]
 [  1   1   0 238]]

✅ Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       240
           1       0.96      0.95      0.95       240
           2       0.95      0.97      0.96       240
           3       1.00      0.99      0.99       240

    accuracy                           0.98       960
   macro avg       0.98      0.98      0.98       960
weighted avg       0.98      0.98      0.98       960



In [56]:
from sklearn.ensemble import StackingClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import (
    accuracy_score, roc_auc_score, average_precision_score,
    precision_score, recall_score, f1_score, confusion_matrix, classification_report
)
import numpy as np

# Feature scaling (essential for SVM)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(train_features)
X_test_scaled = scaler.transform(test_features)


# Meta learner
meta_learner = LogisticRegression(max_iter=1000, multi_class='multinomial', solver='lbfgs')

# Stacking classifier
stacking_clf = StackingClassifier(
    estimators=[
        ('svm', svm_clf),
        ('rf', rf_clf),
        ('xgb', xgb_clf)
    ],
    final_estimator=meta_learner,
    stack_method='predict_proba',  # Important for multiclass classification
    cv=5,
    n_jobs=-1,
    passthrough=False
)

# Train ensemble
stacking_clf.fit(X_train_scaled, train_labels)

# Predict
pred_probs = stacking_clf.predict_proba(X_test_scaled)
pred_labels = np.argmax(pred_probs, axis=1)

# Binarize for multiclass AUC/AUPR
n_classes = len(np.unique(test_labels))
true_binarized = label_binarize(test_labels, classes=list(range(n_classes)))
pred_binarized = label_binarize(pred_labels, classes=list(range(n_classes)))

# Evaluation
accuracy = accuracy_score(test_labels, pred_labels)
roc_auc = roc_auc_score(true_binarized, pred_probs, average='macro', multi_class='ovr')
aupr = average_precision_score(true_binarized, pred_probs, average='macro')
precision = precision_score(test_labels, pred_labels, average='macro')
recall = recall_score(test_labels, pred_labels, average='macro')
f1 = f1_score(test_labels, pred_labels, average='macro')
cm = confusion_matrix(test_labels, pred_labels)
report = classification_report(test_labels, pred_labels)

# Print metrics
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ ROC AUC Score: {roc_auc:.4f}")
print(f"✅ AUPR Score: {aupr:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1 Score: {f1:.4f}")
print("\n✅ Confusion Matrix:")
print(cm)
print("\n✅ Classification Report:")
print(report)


✅ Accuracy: 0.9771
✅ ROC AUC Score: 0.9986
✅ AUPR Score: 0.9965
✅ Precision: 0.9772
✅ Recall: 0.9771
✅ F1 Score: 0.9771

✅ Confusion Matrix:
[[239   0   1   0]
 [  0 228  11   1]
 [  0   7 233   0]
 [  0   2   0 238]]

✅ Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       240
           1       0.96      0.95      0.96       240
           2       0.95      0.97      0.96       240
           3       1.00      0.99      0.99       240

    accuracy                           0.98       960
   macro avg       0.98      0.98      0.98       960
weighted avg       0.98      0.98      0.98       960





60 40

In [57]:
import os
import shutil
import random

# Base original data directory
base_dir = "/content/colon_dataset"
train_dir = os.path.join(base_dir, "train")
val_dir = os.path.join(base_dir, "val")
test_dir = os.path.join(base_dir, "test")

# New root for fixed structure
fixed_base_dir = "/content/colon_data_fixed"
combined_dir = os.path.join(fixed_base_dir, "all_data")
new_train_dir = os.path.join(fixed_base_dir, "train")
new_test_dir = os.path.join(fixed_base_dir, "test")
new_val_dir = os.path.join(fixed_base_dir, "val")

# Create fixed base directory
os.makedirs(combined_dir, exist_ok=True)

# Combine train + test data into one "all_data" folder
for source_folder in [train_dir, test_dir]:
    for class_name in os.listdir(source_folder):
        src_path = os.path.join(source_folder, class_name)
        dst_path = os.path.join(combined_dir, class_name)
        os.makedirs(dst_path, exist_ok=True)
        for file in os.listdir(src_path):
            shutil.copy(os.path.join(src_path, file), os.path.join(dst_path, file))

# Function to split into 80% train / 20% test
def split_data(source_dir, train_dir, test_dir, ratio=0.6):
    for class_name in os.listdir(source_dir):
        class_path = os.path.join(source_dir, class_name)
        files = os.listdir(class_path)
        random.shuffle(files)
        split_idx = int(len(files) * ratio)

        train_files = files[:split_idx]
        test_files = files[split_idx:]

        for out_dir, file_list in [(train_dir, train_files), (test_dir, test_files)]:
            class_out = os.path.join(out_dir, class_name)
            os.makedirs(class_out, exist_ok=True)
            for f in file_list:
                shutil.copy(os.path.join(class_path, f), os.path.join(class_out, f))

# Apply the split
split_data(combined_dir, new_train_dir, new_test_dir)

# Copy val data unchanged
shutil.copytree(val_dir, new_val_dir, dirs_exist_ok=True)

print("✅ Split complete!")
print("Train path:", new_train_dir)
print("Test path:", new_test_dir)
print("Validation path:", new_val_dir)


✅ Split complete!
Train path: /content/colon_data_fixed/train
Test path: /content/colon_data_fixed/test
Validation path: /content/colon_data_fixed/val


In [58]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Original train folder
original_train_dir = '/content/colon_dataset/train'
train_split_dir = '/content/colon_dataset/train_split'
test_split_dir = '/content/colon_dataset/test_split'

# Make new directories
os.makedirs(train_split_dir, exist_ok=True)
os.makedirs(test_split_dir, exist_ok=True)

for class_name in os.listdir(original_train_dir):
    class_dir = os.path.join(original_train_dir, class_name)
    if os.path.isdir(class_dir):
        images = os.listdir(class_dir)
        train_imgs, test_imgs = train_test_split(images, test_size=0.4, random_state=42)

        os.makedirs(os.path.join(train_split_dir, class_name), exist_ok=True)
        os.makedirs(os.path.join(test_split_dir, class_name), exist_ok=True)

        # Move images
        for img in train_imgs:
            shutil.copy2(os.path.join(class_dir, img), os.path.join(train_split_dir, class_name, img))
        for img in test_imgs:
            shutil.copy2(os.path.join(class_dir, img), os.path.join(test_split_dir, class_name, img))

print("✅ Train/Test split (60/40) completed.")


✅ Train/Test split (60/40) completed.


In [59]:
train_dir = '/content/colon_dataset/train_split'
test_dir = '/content/colon_dataset/test_split'
val_dir = '/content/colon_dataset/val'

# [Keep the rest of the code unchanged]


In [60]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

img_size = (224, 224)
batch_size = 32

train_dir = '/content/colon_dataset/train_split'
test_dir = '/content/colon_dataset/test_split'
val_dir = '/content/colon_dataset/val'

train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)


Found 2880 images belonging to 4 classes.
Found 2000 images belonging to 4 classes.
Found 1280 images belonging to 4 classes.


In [61]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

img_size = (224, 224)
batch_size = 32

# Training and augmentation
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    '/content/colon_dataset/train_split',
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    '/content/colon_dataset/test_split',
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

val_generator = val_datagen.flow_from_directory(
    '/content/colon_dataset/val',
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical'
)


Found 2880 images belonging to 4 classes.
Found 1280 images belonging to 4 classes.
Found 2000 images belonging to 4 classes.


In [62]:
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam

base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
predictions = Dense(4, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# Freeze base model
for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])


In [None]:
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator
)


  self._warn_if_super_not_called()


Epoch 1/10
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 268ms/step - accuracy: 0.2724 - loss: 2.1404

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, average_precision_score, f1_score, precision_score, recall_score
import numpy as np

# Predict on test set
y_true = test_generator.classes
y_pred_probs = model.predict(test_generator)
y_pred = np.argmax(y_pred_probs, axis=1)

# Basic metrics
accuracy = np.mean(y_true == y_pred)
f1 = f1_score(y_true, y_pred, average='weighted')
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
roc_auc = roc_auc_score(y_true, y_pred_probs, multi_class='ovr')
aupr = average_precision_score(y_true, y_pred_probs, average='weighted')
conf_matrix = confusion_matrix(y_true, y_pred)
report = classification_report(y_true, y_pred)

# Print results
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ ROC AUC Score: {roc_auc:.4f}")
print(f"✅ AUPR Score: {aupr:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1 Score: {f1:.4f}")
print(f"\n✅ Confusion Matrix:\n{conf_matrix}")
print(f"\n✅ Classification Report:\n{report}")


In [None]:
import numpy as np
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import (accuracy_score, roc_auc_score, average_precision_score,
                             precision_score, recall_score, f1_score, confusion_matrix,
                             classification_report)
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GaussianNoise, Dropout
from tensorflow.keras.applications.densenet import preprocess_input
from tqdm import tqdm

# -----------------------------------
# 1. Load DenseNet121 with Regularization
# -----------------------------------
base_model = DenseNet121(weights='imagenet', include_top=False, pooling='avg', input_shape=(224, 224, 3))

# Add Gaussian Noise and Dropout for regularization
x = GaussianNoise(0.1)(base_model.output)
x = Dropout(0.5)(x)

# Final feature extraction model
feature_model = Model(inputs=base_model.input, outputs=x)

# -----------------------------------
# 2. Feature Extraction Function
# -----------------------------------
def extract_features(generator, model):
    features, labels = [], []
    for i in tqdm(range(len(generator))):
        x_batch, y_batch = generator[i]
        x_batch = preprocess_input(x_batch)
        batch_features = model.predict(x_batch, verbose=0)
        features.append(batch_features)
        labels.append(y_batch)
    return np.vstack(features), np.concatenate(labels)

# -----------------------------------
# 3. Extract Features from Train/Val/Test Sets
# (Assumes `train_generator`, `val_generator`, and `test_generator` are already defined)
# -----------------------------------
train_features, train_labels = extract_features(train_generator, feature_model)
val_features, val_labels = extract_features(val_generator, feature_model)
test_features, test_labels = extract_features(test_generator, feature_model)



In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components=0.95)  # Keep 95% variance
train_features = pca.fit_transform(train_features)
val_features = pca.transform(val_features)
test_features = pca.transform(test_features)


In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.metrics import (
    accuracy_score, roc_auc_score, average_precision_score,
    precision_score, recall_score, f1_score, confusion_matrix, classification_report
)
from sklearn.preprocessing import label_binarize
import numpy as np

# Fix the label shape
train_labels = np.argmax(train_labels, axis=1)
test_labels = np.argmax(test_labels, axis=1)

# Train SVM
svm_clf = make_pipeline(StandardScaler(), SVC(kernel='rbf', probability=True, C=10, gamma='scale', random_state=42))
svm_clf.fit(train_features, train_labels)

# Predict
pred_probs = svm_clf.predict_proba(test_features)
pred_labels = np.argmax(pred_probs, axis=1)

# Binarize for multi-class metrics
n_classes = len(np.unique(test_labels))
true_binarized = label_binarize(test_labels, classes=list(range(n_classes)))
pred_binarized = label_binarize(pred_labels, classes=list(range(n_classes)))

# Metrics
accuracy = accuracy_score(test_labels, pred_labels)
roc_auc = roc_auc_score(true_binarized, pred_probs, average='macro', multi_class='ovr')
aupr = average_precision_score(true_binarized, pred_probs, average='macro')
precision = precision_score(test_labels, pred_labels, average='macro')
recall = recall_score(test_labels, pred_labels, average='macro')
f1 = f1_score(test_labels, pred_labels, average='macro')
cm = confusion_matrix(test_labels, pred_labels)
report = classification_report(test_labels, pred_labels, target_names=test_generator.class_indices.keys())

# Output
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ ROC AUC Score: {roc_auc:.4f}")
print(f"✅ AUPR Score: {aupr:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1 Score: {f1:.4f}")
print("\n✅ Confusion Matrix:")
print(cm)
print("\n✅ Classification Report:")
print(report)


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import (
    accuracy_score, roc_auc_score, average_precision_score,
    precision_score, recall_score, f1_score, confusion_matrix, classification_report
)
import numpy as np

# Optional: scale features (important for some classifiers, less so for RF but keeps consistency)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(train_features)
X_test_scaled = scaler.transform(test_features)

# Train Random Forest
rf_clf = RandomForestClassifier(n_estimators=200, max_depth=20, random_state=42, n_jobs=-1)
rf_clf.fit(X_train_scaled, train_labels)

# Predict
pred_probs = rf_clf.predict_proba(X_test_scaled)
pred_labels = np.argmax(pred_probs, axis=1)

# Binarize for multi-class AUC and AUPR
n_classes = len(np.unique(test_labels))
true_binarized = label_binarize(test_labels, classes=list(range(n_classes)))
pred_binarized = label_binarize(pred_labels, classes=list(range(n_classes)))

# Evaluation
accuracy = accuracy_score(test_labels, pred_labels)
roc_auc = roc_auc_score(true_binarized, pred_probs, average='macro', multi_class='ovr')
aupr = average_precision_score(true_binarized, pred_probs, average='macro')
precision = precision_score(test_labels, pred_labels, average='macro')
recall = recall_score(test_labels, pred_labels, average='macro')
f1 = f1_score(test_labels, pred_labels, average='macro')
cm = confusion_matrix(test_labels, pred_labels)
report = classification_report(test_labels, pred_labels)

# Output
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ ROC AUC Score: {roc_auc:.4f}")
print(f"✅ AUPR Score: {aupr:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1 Score: {f1:.4f}")

print("\n✅ Confusion Matrix:")
print(cm)

print("\n✅ Classification Report:")
print(report)


In [None]:
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import (
    accuracy_score, roc_auc_score, average_precision_score,
    precision_score, recall_score, f1_score, confusion_matrix, classification_report
)
import numpy as np

# Scale features (helps even with XGBoost sometimes)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(train_features)
X_test_scaled = scaler.transform(test_features)

# Initialize XGBoost classifier
xgb_clf = XGBClassifier(
    n_estimators=200,
    max_depth=8,
    learning_rate=0.05,
    objective='multi:softprob',
    num_class=4,
    use_label_encoder=False,
    eval_metric='mlogloss',
    random_state=42,
    n_jobs=-1
)

# Train
xgb_clf.fit(X_train_scaled, train_labels)

# Predict probabilities and labels
pred_probs = xgb_clf.predict_proba(X_test_scaled)
pred_labels = np.argmax(pred_probs, axis=1)

# Binarize for multi-class AUC and AUPR
n_classes = len(np.unique(test_labels))
true_binarized = label_binarize(test_labels, classes=list(range(n_classes)))
pred_binarized = label_binarize(pred_labels, classes=list(range(n_classes)))

# Evaluation metrics
accuracy = accuracy_score(test_labels, pred_labels)
roc_auc = roc_auc_score(true_binarized, pred_probs, average='macro', multi_class='ovr')
aupr = average_precision_score(true_binarized, pred_probs, average='macro')
precision = precision_score(test_labels, pred_labels, average='macro')
recall = recall_score(test_labels, pred_labels, average='macro')
f1 = f1_score(test_labels, pred_labels, average='macro')
cm = confusion_matrix(test_labels, pred_labels)
report = classification_report(test_labels, pred_labels)

# Results
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ ROC AUC Score: {roc_auc:.4f}")
print(f"✅ AUPR Score: {aupr:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1 Score: {f1:.4f}")

print("\n✅ Confusion Matrix:")
print(cm)

print("\n✅ Classification Report:")
print(report)


In [None]:
from sklearn.ensemble import VotingClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import (
    accuracy_score, roc_auc_score, average_precision_score,
    precision_score, recall_score, f1_score, confusion_matrix, classification_report
)
import numpy as np

# Scale features (very important for SVM)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(train_features)
X_test_scaled = scaler.transform(test_features)

# Define base models
svm_clf = SVC(kernel='rbf', probability=True, C=2, gamma='scale', random_state=42)
rf_clf = RandomForestClassifier(n_estimators=150, max_depth=12, random_state=42, n_jobs=-1)
xgb_clf = XGBClassifier(
    n_estimators=200,
    max_depth=8,
    learning_rate=0.05,
    objective='multi:softprob',
    num_class=4,
    use_label_encoder=False,
    eval_metric='mlogloss',
    random_state=42,
    n_jobs=-1
)

# Voting Classifier (soft voting based on probabilities)
voting_clf = VotingClassifier(
    estimators=[('svm', svm_clf), ('rf', rf_clf), ('xgb', xgb_clf)],
    voting='soft',  # soft = use predicted probabilities
    n_jobs=-1
)

# Train ensemble
voting_clf.fit(X_train_scaled, train_labels)

# Predict
pred_probs = voting_clf.predict_proba(X_test_scaled)
pred_labels = np.argmax(pred_probs, axis=1)

# Binarize for AUC/AUPR
n_classes = len(np.unique(test_labels))
true_binarized = label_binarize(test_labels, classes=list(range(n_classes)))
pred_binarized = label_binarize(pred_labels, classes=list(range(n_classes)))

# Metrics
accuracy = accuracy_score(test_labels, pred_labels)
roc_auc = roc_auc_score(true_binarized, pred_probs, average='macro', multi_class='ovr')
aupr = average_precision_score(true_binarized, pred_probs, average='macro')
precision = precision_score(test_labels, pred_labels, average='macro')
recall = recall_score(test_labels, pred_labels, average='macro')
f1 = f1_score(test_labels, pred_labels, average='macro')
cm = confusion_matrix(test_labels, pred_labels)
report = classification_report(test_labels, pred_labels)

# Print results
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ ROC AUC Score: {roc_auc:.4f}")
print(f"✅ AUPR Score: {aupr:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1 Score: {f1:.4f}")

print("\n✅ Confusion Matrix:")
print(cm)

print("\n✅ Classification Report:")
print(report)


In [None]:
from sklearn.ensemble import StackingClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, label_binarize
from sklearn.metrics import (
    accuracy_score, roc_auc_score, average_precision_score,
    precision_score, recall_score, f1_score, confusion_matrix, classification_report
)
import numpy as np

# Feature scaling (essential for SVM)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(train_features)
X_test_scaled = scaler.transform(test_features)


# Meta learner
meta_learner = LogisticRegression(max_iter=1000, multi_class='multinomial', solver='lbfgs')

# Stacking classifier
stacking_clf = StackingClassifier(
    estimators=[
        ('svm', svm_clf),
        ('rf', rf_clf),
        ('xgb', xgb_clf)
    ],
    final_estimator=meta_learner,
    stack_method='predict_proba',  # Important for multiclass classification
    cv=5,
    n_jobs=-1,
    passthrough=False
)

# Train ensemble
stacking_clf.fit(X_train_scaled, train_labels)

# Predict
pred_probs = stacking_clf.predict_proba(X_test_scaled)
pred_labels = np.argmax(pred_probs, axis=1)

# Binarize for multiclass AUC/AUPR
n_classes = len(np.unique(test_labels))
true_binarized = label_binarize(test_labels, classes=list(range(n_classes)))
pred_binarized = label_binarize(pred_labels, classes=list(range(n_classes)))

# Evaluation
accuracy = accuracy_score(test_labels, pred_labels)
roc_auc = roc_auc_score(true_binarized, pred_probs, average='macro', multi_class='ovr')
aupr = average_precision_score(true_binarized, pred_probs, average='macro')
precision = precision_score(test_labels, pred_labels, average='macro')
recall = recall_score(test_labels, pred_labels, average='macro')
f1 = f1_score(test_labels, pred_labels, average='macro')
cm = confusion_matrix(test_labels, pred_labels)
report = classification_report(test_labels, pred_labels)

# Print metrics
print(f"✅ Accuracy: {accuracy:.4f}")
print(f"✅ ROC AUC Score: {roc_auc:.4f}")
print(f"✅ AUPR Score: {aupr:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall: {recall:.4f}")
print(f"✅ F1 Score: {f1:.4f}")
print("\n✅ Confusion Matrix:")
print(cm)
print("\n✅ Classification Report:")
print(report)
