In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import h5py
import numpy as np
from tqdm import tqdm
import pandas as pd
import os
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision.models import resnet50
import numpy as np
from torch.utils.data import Dataset
import time
import copy

In [2]:
# ----------------------------
# Dataset定義（PCam用）
# ----------------------------
class PCamDataset(Dataset):
    def __init__(self, h5_x_path, h5_y_path=None, transform=None):
        self.x_path = h5_x_path
        self.y_path = h5_y_path
        self.transform = transform
        self.has_labels = h5_y_path is not None  # ← 修正

        with h5py.File(h5_x_path, 'r') as x_file:
            self.length = len(x_file['x'])

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        with h5py.File(self.x_path, 'r') as x_file:
            image = x_file['x'][idx]

        image = transforms.ToPILImage()(image.astype(np.uint8))

        if self.transform:
            image = self.transform(image)

        if self.has_labels:
            with h5py.File(self.y_path, 'r') as y_file:
                label = y_file['y'][idx].astype(np.float32)
            return image, label
        else:
            return image


In [3]:
# ----------------------------
# データ変換
# ----------------------------
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(96, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_val_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [4]:
# ----------------------------
# データローダ-
# ----------------------------
train_dataset = PCamDataset('camelyonpatch_level_2_split_train_x.h5',
                          'camelyonpatch_level_2_split_train_y.h5',
                          transform=transform_train)

val_dataset = PCamDataset('valid_x_uncompressed.h5',
                         'valid_y_uncompressed.h5',
                         transform=transform_val_test)

test_dataset = PCamDataset('camelyonpatch_level_2_split_test_x.h5',
                         'camelyonpatch_level_2_split_test_y.h5',  # テストラベルを追加
                         transform=transform_val_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4, pin_memory=True)

In [None]:

# ----------------------------
# デバイス設定
# ----------------------------
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# ----------------------------
# モデル定義 (ResNet50)
# ----------------------------
def create_model():
    model = resnet50(pretrained=True)  # ImageNet事前学習モデル
    
    # 最終層をカスタマイズ (2クラス分類)
    num_ftrs = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Dropout(0.5),  # 過学習防止
        nn.Linear(num_ftrs, 2)
    )
    return model

# ----------------------------
# 訓練関数
# ----------------------------
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        # 各エポックの訓練と検証フェーズ
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # 訓練モード
                dataloader = train_loader
            else:
                model.eval()   # 評価モード
                dataloader = val_loader

            running_loss = 0.0
            running_corrects = 0

            # データをイテレート
            for inputs, labels in dataloader:
                inputs = inputs.to(device)
                labels = labels.to(device).long().squeeze()

                # 勾配をゼロに
                optimizer.zero_grad()

                # 順伝播
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # 訓練時は逆伝播+最適化
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # 統計情報
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / len(dataloader.dataset)
            epoch_acc = running_corrects.double() / len(dataloader.dataset)

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # 最高精度モデルを深層コピー
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    # ベストモデル重みをロード
    model.load_state_dict(best_model_wts)
    return model

# ----------------------------
# 評価関数
# ----------------------------
def evaluate_model(model, dataloader):
    model.eval()
    corrects = 0
    total = 0
    
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device).long().squeeze()
            
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            
            corrects += torch.sum(preds == labels.data)
            total += labels.size(0)
    
    accuracy = corrects.double() / total
    print(f'Test Accuracy: {accuracy:.4f}')
    return accuracy

# ----------------------------
# メイン実行
# ----------------------------
if __name__ == '__main__':
    # モデル作成
    model = create_model()
    model = model.to(device)
    
    # 損失関数とオプティマイザ
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
    
    # 学習率スケジューラ (7エポックごとに学習率を1/10に減少)
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
    
    # モデル訓練
    model = train_model(
        model, 
        criterion, 
        optimizer, 
        exp_lr_scheduler, 
        num_epochs=25
    )
    
    # テストデータで評価
    test_acc = evaluate_model(model, test_loader)
    
    # 95%以上を達成したか確認
    if test_acc >= 0.95:
        torch.save(model.state_dict(), 'best_model.pth')
        print("Successfully achieved 95%+ accuracy!")
    else:
        print("Did not reach target accuracy. Consider tuning hyperparameters.")




Epoch 0/24
----------
train Loss: 0.3481 Acc: 0.8496
val Loss: 0.3421 Acc: 0.8500

Epoch 1/24
----------
train Loss: 0.2936 Acc: 0.8777
val Loss: 0.3827 Acc: 0.8467

Epoch 2/24
----------
train Loss: 0.2714 Acc: 0.8878
val Loss: 0.3403 Acc: 0.8591

Epoch 3/24
----------
train Loss: 0.2594 Acc: 0.8945
val Loss: 0.3280 Acc: 0.8695

Epoch 4/24
----------
train Loss: 0.2503 Acc: 0.8990
val Loss: 0.3044 Acc: 0.8713

Epoch 5/24
----------
train Loss: 0.2429 Acc: 0.9020
val Loss: 0.3549 Acc: 0.8488

Epoch 6/24
----------
train Loss: 0.2395 Acc: 0.9040
val Loss: 0.4228 Acc: 0.8341

Epoch 7/24
----------
train Loss: 0.2028 Acc: 0.9210
val Loss: 0.3147 Acc: 0.8733

Epoch 8/24
----------
train Loss: 0.1938 Acc: 0.9252
val Loss: 0.3175 Acc: 0.8782

Epoch 9/24
----------
train Loss: 0.1878 Acc: 0.9278
val Loss: 0.3315 Acc: 0.8729

Epoch 10/24
----------
train Loss: 0.1842 Acc: 0.9295
val Loss: 0.3647 Acc: 0.8631

Epoch 11/24
----------
train Loss: 0.1809 Acc: 0.9308
val Loss: 0.3186 Acc: 0.8800

Ep

KeyboardInterrupt: 

: 