## 1. Import needed libraries and Set Device

In [1]:
# --- General Libraries ---
import os
import random
import time
import warnings

# --- Data Handling ---
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_score, recall_score,confusion_matrix

# --- Image Handling and Visualization ---
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image

# --- PyTorch and Deep Learning ---
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models

# --- Utilities ---
from tqdm import tqdm  # For training progress visualization
import glob

# --- Warning Suppression ---
warnings.filterwarnings('ignore')


In [2]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")
print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.current_device())
print(torch.cuda.get_device_name(torch.cuda.current_device()))

Using cuda device
True
1
0
NVIDIA GeForce RTX 4060 Ti


In [3]:
root_dir = "lung_colon_image_set"
image_paths = []
binary_labels = []
multi_labels = []

label_mapping = {
    'lung_n': 0, 'lung_aca': 1, 'lung_scc': 2,
    'colon_n': 3, 'colon_aca': 4
}

binary_mapping = {
    'lung_n': 0, 'colon_n': 0,
    'lung_aca': 1, 'lung_scc': 1, 'colon_aca': 1
}

for subfolder in ['lung_image_sets/lung_n', 'lung_image_sets/lung_aca', 'lung_image_sets/lung_scc',
                  'colon_image_sets/colon_n', 'colon_image_sets/colon_aca']:
    class_dir = os.path.join(root_dir, subfolder)
    class_name = subfolder.split('/')[-1]

    for img_file in os.listdir(class_dir):
        img_path = os.path.join(class_dir, img_file)
        image_paths.append(img_path)
        multi_labels.append(label_mapping[class_name])
        binary_labels.append(binary_mapping[class_name])

print(f"Total Image: {len(image_paths)}")

Total Image: 25000


In [4]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

class FeatureDataset(Dataset):
    def __init__(self, image_paths, binary_labels, multi_labels, transform=None):
        self.image_paths = image_paths
        self.binary_labels = binary_labels
        self.multi_labels = multi_labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, self.binary_labels[idx], self.multi_labels[idx]


In [5]:
X_train, X_test, y_bin_train, y_bin_test, y_multi_train, y_multi_test = train_test_split(
    image_paths, binary_labels, multi_labels,
    test_size=0.3, random_state=42, stratify=multi_labels
)

train_dataset = FeatureDataset(X_train, y_bin_train, y_multi_train, transform)
test_dataset = FeatureDataset(X_test, y_bin_test, y_multi_test, transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [6]:
model = models.efficientnet_b3(weights=models.EfficientNet_B3_Weights.DEFAULT)
model.classifier = torch.nn.Identity()  # Sadece feature çıkarımı
model = model.to(device)
model.eval()


EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 40, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=40, bias=False)
            (1): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(40, 10, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(10, 40, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActiv

In [7]:
def extract_features(dataloader):
    all_features = []
    all_bin_labels = []
    all_multi_labels = []

    with torch.no_grad():
        for inputs, bin_labels, multi_labels in dataloader:
            inputs = inputs.to(device)
            features = model(inputs)
            all_features.append(features.cpu().numpy())
            all_bin_labels.extend(bin_labels)
            all_multi_labels.extend(multi_labels)

    return np.concatenate(all_features), np.array(all_bin_labels), np.array(all_multi_labels)

In [8]:
X_train_feat, y_bin_train, y_multi_train = extract_features(train_loader)
X_test_feat, y_bin_test, y_multi_test = extract_features(test_loader)

In [9]:
rf_binary = RandomForestClassifier(n_estimators=100, random_state=42)
rf_binary.fit(X_train_feat, y_bin_train)
y_bin_pred = rf_binary.predict(X_test_feat)

print("🔎 Binary Classification Report:")
print(classification_report(y_bin_test, y_bin_pred, target_names=["Benign", "Malignant"]))


🔎 Binary Classification Report:
              precision    recall  f1-score   support

      Benign       1.00      0.98      0.99      3000
   Malignant       0.99      1.00      0.99      4500

    accuracy                           0.99      7500
   macro avg       0.99      0.99      0.99      7500
weighted avg       0.99      0.99      0.99      7500



In [10]:
rf_multi = RandomForestClassifier(n_estimators=100, random_state=42)
rf_multi.fit(X_train_feat, y_multi_train)
y_multi_pred = rf_multi.predict(X_test_feat)

print("🔎 Multiclass Classification Report:")
print(classification_report(y_multi_test, y_multi_pred, target_names=[
    "Lung Benign", "Lung ACA", "Lung SCC", "Colon Benign", "Colon ACA"
]))


🔎 Multiclass Classification Report:
              precision    recall  f1-score   support

 Lung Benign       1.00      0.99      0.99      1500
    Lung ACA       0.94      0.95      0.95      1500
    Lung SCC       0.95      0.96      0.96      1500
Colon Benign       1.00      0.98      0.99      1500
   Colon ACA       0.97      0.97      0.97      1500

    accuracy                           0.97      7500
   macro avg       0.97      0.97      0.97      7500
weighted avg       0.97      0.97      0.97      7500

