In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import zipfile
import os


zip_path = "/content/drive/MyDrive/archive.zip"
extract_path = "/content/drive/MyDrive/breakhis_dataset"  # Çıkartılacak klasör


with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)



In [None]:
import os

extract_path = "/content/drive/MyDrive/Yüksek Lisans/ML/breakhis_dataset"
print("BreaKHis_v1 içeriği:", os.listdir(os.path.join(extract_path, "BreaKHis_v1")))


In [None]:
# Gerekli kütüphaneler
import os
import random
from PIL import Image
from collections import Counter

import torch
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torchvision import transforms

# ---------------------------
# 1️⃣ Dataset Path
# ---------------------------
data_path = "/root/.cache/kagglehub/datasets/ambarish/breakhis/versions/4/BreaKHis_v1/breast"
classes = ['benign', 'malignant']  # Ana sınıflar
sub_classes = {
    'benign': ['A','F','PT','TA'],
    'malignant': ['DC','LC','MC','PC']
}

# ---------------------------
# 2️⃣ Image Processing & Transform
# ---------------------------
# ImageNet mean/std
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

train_transform = transforms.Compose([
    transforms.Resize((224,224)),              # resize
    transforms.RandomHorizontalFlip(),         # augmentasyon
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),                     # [0,255] -> [0,1]
    transforms.Normalize(mean=mean, std=std)  # normalize
])

test_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

# ---------------------------
# 3️⃣ Custom Dataset
# ---------------------------
class BreakHisDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.transform = transform
        self.samples = []

        for main_cls in sub_classes:
            for sub_cls in sub_classes[main_cls]:
                cls_path = os.path.join(root_dir, main_cls, sub_cls)
                if not os.path.exists(cls_path):
                    continue
                for img_name in os.listdir(cls_path):
                    if img_name.endswith(('.png','.jpg','.jpeg')):
                        self.samples.append((os.path.join(cls_path, img_name), sub_cls))

        # Label to integer mapping
        self.cls2idx = {cls:i for i, cls in enumerate([sc for v in sub_classes.values() for sc in v])}

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, self.cls2idx[label]

# ---------------------------
# 4️⃣ Dataset & Undersampling / Weighted Sampling
# ---------------------------
full_dataset = BreakHisDataset(data_path, transform=train_transform)

# Dengeleme: Weighted Sampler
labels = [label for _, label in full_dataset.samples]
labels_idx = [full_dataset.cls2idx[lbl] for lbl in labels]
class_counts = Counter(labels_idx)
num_samples = sum(class_counts.values())
weights = [num_samples/class_counts[l] for l in labels_idx]

sampler = WeightedRandomSampler(weights, num_samples=num_samples, replacement=True)

# ---------------------------
# 5️⃣ DataLoader
# ---------------------------
train_loader = DataLoader(full_dataset, batch_size=16, sampler=sampler, num_workers=2)


['Folds.csv', 'BreaKHis_v1']
