In [8]:
import torch
from torch.utils.data import Dataset
import h5py
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import models, transforms
from torch.utils.data import DataLoader, WeightedRandomSampler
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix
from tqdm import tqdm
import time
import copy
from collections import Counter

class PCamDataset(Dataset):
    def __init__(self, h5_x_path, h5_y_path, transform=None):
        self.x_h5 = h5py.File(h5_x_path, 'r')
        self.y_h5 = h5py.File(h5_y_path, 'r')
        self.transform = transform
        self.length = len(self.y_h5['y'])

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        image = self.x_h5['x'][idx]  # shape: (96, 96, 3), dtype: uint8
        label = self.y_h5['y'][idx].item()


        # numpy → PIL → transform
        image = image.astype(np.uint8)
        image = transforms.ToPILImage()(image)

        if self.transform:
            image = self.transform(image)

        return image, label

transform_train = transforms.Compose([
    transforms.RandomResizedCrop(96, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(20),
    transforms.ColorJitter(0.2, 0.2, 0.2, 0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])


transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# データセットとデータローダーの設定 (あなたのコードをそのまま使用)
train_dataset = PCamDataset('camelyonpatch_level_2_split_train_x.h5',
                           'camelyonpatch_level_2_split_train_y.h5',
                           transform=transform_train)

test_dataset = PCamDataset('camelyonpatch_level_2_split_test_x.h5',
                          'camelyonpatch_level_2_split_test_y.h5',
                          transform=transform_test)

val_dataset = PCamDataset('valid_x_uncompressed.h5', 'valid_y_uncompressed.h5', transform=transform_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)


In [9]:
import torch
import torch.nn as nn
from torchvision.models import resnet50, ResNet50_Weights

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Pretrained ResNet50 & modify final layer
model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)
model.fc = nn.Linear(model.fc.in_features, 2)  # PCamは2クラス
model = model.to(device)


In [10]:
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau

# Focal Lossで難しい例を強調（重みも自動調整可）
class FocalLoss(nn.Module):
    def __init__(self, gamma=2.0, weight=None):
        super().__init__()
        self.gamma = gamma
        self.ce = nn.CrossEntropyLoss(weight=weight)

    def forward(self, input, target):
        logpt = -self.ce(input, target)
        pt = torch.exp(logpt)
        loss = -((1 - pt) ** self.gamma) * logpt
        return loss.mean()

# 重みの調整（Class1に重みをかける）
class_weights = torch.tensor([1.0, 2.0]).to(device)  # 正例を重視
criterion = FocalLoss(weight=class_weights)
optimizer = Adam(model.parameters(), lr=1e-4)
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3)


In [None]:
from tqdm import tqdm
import numpy as np

def evaluate(model, loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for imgs, labels in loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            preds = torch.argmax(outputs, dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    return correct / total

best_val_acc = 0
for epoch in range(30):
    model.train()
    total_loss = 0
    for imgs, labels in tqdm(train_loader):
        imgs, labels = imgs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    val_acc = evaluate(model, val_loader)
    scheduler.step(val_acc)

    print(f"Epoch {epoch}: Train Loss={total_loss:.4f}, Val Acc={val_acc:.4f}")
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_model.pth")
        print("Best model saved!")


100%|██████████| 4096/4096 [04:25<00:00, 15.40it/s]


Epoch 0: Train Loss=52.7334, Val Acc=0.8726
Best model saved!


100%|██████████| 4096/4096 [04:31<00:00, 15.11it/s]


Epoch 1: Train Loss=28.6109, Val Acc=0.8727
Best model saved!


100%|██████████| 4096/4096 [04:32<00:00, 15.05it/s]


Epoch 2: Train Loss=21.8021, Val Acc=0.8821
Best model saved!


100%|██████████| 4096/4096 [04:32<00:00, 15.03it/s]


Epoch 3: Train Loss=18.1510, Val Acc=0.8835
Best model saved!


100%|██████████| 4096/4096 [04:30<00:00, 15.14it/s]


Epoch 4: Train Loss=16.0733, Val Acc=0.8950
Best model saved!


100%|██████████| 4096/4096 [04:32<00:00, 15.05it/s]


Epoch 5: Train Loss=14.2135, Val Acc=0.8770


100%|██████████| 4096/4096 [04:31<00:00, 15.09it/s]


Epoch 6: Train Loss=12.4149, Val Acc=0.8841


100%|██████████| 4096/4096 [04:33<00:00, 14.96it/s]


Epoch 7: Train Loss=11.2573, Val Acc=0.8805


100%|██████████| 4096/4096 [04:33<00:00, 14.98it/s]


Epoch 8: Train Loss=10.8159, Val Acc=0.8833


100%|██████████| 4096/4096 [04:32<00:00, 15.01it/s]


Epoch 9: Train Loss=7.3442, Val Acc=0.8905


100%|██████████| 4096/4096 [04:33<00:00, 14.99it/s]


Epoch 10: Train Loss=6.2486, Val Acc=0.8889


100%|██████████| 4096/4096 [04:33<00:00, 15.00it/s]


Epoch 11: Train Loss=6.0977, Val Acc=0.8838


100%|██████████| 4096/4096 [04:33<00:00, 14.96it/s]


Epoch 12: Train Loss=5.6346, Val Acc=0.8860


100%|██████████| 4096/4096 [04:33<00:00, 14.98it/s]


Epoch 13: Train Loss=4.3818, Val Acc=0.8891


100%|██████████| 4096/4096 [04:31<00:00, 15.08it/s]


Epoch 14: Train Loss=4.0360, Val Acc=0.8866


100%|██████████| 4096/4096 [04:31<00:00, 15.09it/s]


Epoch 15: Train Loss=3.6962, Val Acc=0.8847


 82%|████████▏ | 3359/4096 [03:44<00:49, 14.98it/s]


KeyboardInterrupt: 

: 