<a href="https://colab.research.google.com/github/daichinakahata/kadai-app/blob/main/Untitled6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [30]:
!pip install torchmetrics[image] torch-fidelity opencv-python-headless



In [31]:
!pip install torchmetrics



In [32]:
import os
import datetime
import pytz
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
from pathlib import Path
import kagglehub
import cv2
import gc # ガベージコレクション用

# FID計算用
from torchmetrics.image.fid import FrechetInceptionDistance

# --- 1. 設定とハイパーパラメータ ---
IMAGE_SIZE = 128
batch_size = 8   # 【変更】メモリ不足回避のため 16 -> 8 に縮小
epochs = 20
nz = 100
lrG = 0.0002
lrD = 0.0001
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device} | Resolution: {IMAGE_SIZE}x{IMAGE_SIZE}")

# --- 2. データセットの準備 ---
path = kagglehub.dataset_download("badasstechie/celebahq-resized-256x256")
transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.CenterCrop(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])
dataset = datasets.ImageFolder(root=Path(path), transform=transform)
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=2)

# --- 3. ディレクトリ作成 ---
BASE_OUT_PATH = f'./2026/GAN_Comp_128_Fix_{datetime.datetime.now(pytz.timezone("Asia/Tokyo")).strftime("%Y%m%d_%H%M")}'
os.makedirs(BASE_OUT_PATH, exist_ok=True)

# --- 4. モデル定義 ---

# A. DCGAN
class DC_Generator(nn.Module):
    def __init__(self):
        super().__init__()
        self.main = nn.Sequential(
            nn.ConvTranspose2d(nz, 1024, 4, 1, 0, bias=False), nn.BatchNorm2d(1024), nn.ReLU(True),
            nn.ConvTranspose2d(1024, 512, 4, 2, 1, bias=False), nn.BatchNorm2d(512), nn.ReLU(True),
            nn.ConvTranspose2d(512, 256, 4, 2, 1, bias=False), nn.BatchNorm2d(256), nn.ReLU(True),
            nn.ConvTranspose2d(256, 128, 4, 2, 1, bias=False), nn.BatchNorm2d(128), nn.ReLU(True),
            nn.ConvTranspose2d(128, 64, 4, 2, 1, bias=False), nn.BatchNorm2d(64), nn.ReLU(True),
            nn.ConvTranspose2d(64, 3, 4, 2, 1, bias=False), nn.Tanh()
        )
    def forward(self, x): return self.main(x)

class DC_Discriminator(nn.Module):
    def __init__(self):
        super().__init__()
        self.main = nn.Sequential(
            nn.Conv2d(3, 32, 4, 2, 1, bias=False), nn.LeakyReLU(0.2, False),
            nn.Conv2d(32, 64, 4, 2, 1, bias=False), nn.BatchNorm2d(64), nn.LeakyReLU(0.2, False),
            nn.Conv2d(64, 128, 4, 2, 1, bias=False), nn.BatchNorm2d(128), nn.LeakyReLU(0.2, False),
            nn.Conv2d(128, 256, 4, 2, 1, bias=False), nn.BatchNorm2d(256), nn.LeakyReLU(0.2, False),
            nn.Conv2d(256, 512, 4, 2, 1, bias=False), nn.BatchNorm2d(512), nn.LeakyReLU(0.2, False),
            nn.Conv2d(512, 1, 4, 1, 0, bias=False), nn.Sigmoid()
        )
    def forward(self, x): return self.main(x)

# B. 普通のGAN (MLP型)
class Simple_Generator(nn.Module):
    def __init__(self):
        super().__init__()
        self.main = nn.Sequential(
            nn.Linear(nz, 1024), nn.LeakyReLU(0.2, True),
            nn.Linear(1024, 2048), nn.LeakyReLU(0.2, True),
            nn.Linear(2048, 3 * IMAGE_SIZE * IMAGE_SIZE), nn.Tanh()
        )
    def forward(self, x):
        return self.main(x.view(x.size(0), -1)).view(-1, 3, IMAGE_SIZE, IMAGE_SIZE)

class Simple_Discriminator(nn.Module):
    def __init__(self):
        super().__init__()
        self.main = nn.Sequential(
            nn.Linear(3 * IMAGE_SIZE * IMAGE_SIZE, 1024), nn.LeakyReLU(0.2, True),
            nn.Linear(1024, 512), nn.LeakyReLU(0.2, True),
            nn.Linear(512, 1), nn.Sigmoid()
        )
    def forward(self, x):
        return self.main(x.view(x.size(0), -1))

# --- 5. Grad-CAM (DCGAN用) ---
class GradCAM:
    def __init__(self, model, target_layer):
        self.model, self.target_layer = model, target_layer
        self.gradients, self.activations = None, None
        self.target_layer.register_forward_hook(lambda m, i, o: setattr(self, 'activations', o))
        self.target_layer.register_full_backward_hook(lambda m, i, o: setattr(self, 'gradients', o[0]))

    def generate(self, input_tensor):
        self.model.zero_grad()
        output = self.model(input_tensor)
        output.backward()
        weights = torch.mean(self.gradients, dim=(2, 3), keepdim=True)
        cam = F.relu(torch.sum(weights * self.activations, dim=1).squeeze()).detach().cpu().numpy()
        cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-8)
        return cam, output.item()

# --- 6. 初期化 ---
netG_dc, netD_dc = DC_Generator().to(device), DC_Discriminator().to(device)
netG_si, netD_si = Simple_Generator().to(device), Simple_Discriminator().to(device)

fid_dc = FrechetInceptionDistance(feature=2048).to(device)
fid_si = FrechetInceptionDistance(feature=2048).to(device)

optG_dc = optim.Adam(netG_dc.parameters(), lr=lrG, betas=(0.5, 0.999))
optD_dc = optim.Adam(netD_dc.parameters(), lr=lrD, betas=(0.5, 0.999))
optG_si = optim.Adam(netG_si.parameters(), lr=lrG, betas=(0.5, 0.999))
optD_si = optim.Adam(netD_si.parameters(), lr=lrD, betas=(0.5, 0.999))

criterion = nn.BCELoss()

# --- 7. 学習ループ ---
print("Training Start (128x128 / Batch 8)...")
for epoch in range(epochs):
    for i, (images, _) in enumerate(train_loader):
        b_size = images.size(0)
        images = images.to(device)
        noise = torch.randn(b_size, nz, 1, 1, device=device)

        # --- DCGAN 学習 ---
        netD_dc.zero_grad()
        d_real = netD_dc(images).view(-1); fake_dc = netG_dc(noise)
        d_fake = netD_dc(fake_dc.detach()).view(-1)
        (criterion(d_real, torch.full((b_size,), 0.9, device=device)) + criterion(d_fake, torch.full((b_size,), 0.0, device=device))).backward()
        optD_dc.step()

        netG_dc.zero_grad(); criterion(netD_dc(fake_dc).view(-1), torch.full((b_size,), 0.9, device=device)).backward(); optG_dc.step()

        # 【重要】メモリ解放: DCGANの計算グラフを削除してSimpleGAN用に場所を空ける
        del fake_dc, d_real, d_fake
        torch.cuda.empty_cache()

        # --- SimpleGAN 学習 ---
        netD_si.zero_grad()
        # 再度必要な計算のみ行う
        d_real_si = netD_si(images).view(-1)
        fake_si = netG_si(noise)
        d_fake_si = netD_si(fake_si.detach()).view(-1)
        (criterion(d_real_si, torch.full((b_size,), 0.9, device=device)) + criterion(d_fake_si, torch.full((b_size,), 0.0, device=device))).backward()
        optD_si.step()

        netG_si.zero_grad(); criterion(netD_si(fake_si).view(-1), torch.full((b_size,), 0.9, device=device)).backward(); optG_si.step()

        # メモリ解放
        del fake_si, d_real_si, d_fake_si, images, noise
        torch.cuda.empty_cache()

    with torch.no_grad():
        # FID計算用データ（バッチサイズ8に合わせて調整）
        real_sample, _ = next(iter(train_loader))
        real_sample = real_sample.to(device)
        r_uint = ((real_sample * 0.5 + 0.5) * 255).to(torch.uint8)

        noise_vis = torch.randn(real_sample.size(0), nz, 1, 1, device=device)
        f_dc_uint = ((netG_dc(noise_vis) * 0.5 + 0.5) * 255).to(torch.uint8)
        f_si_uint = ((netG_si(noise_vis) * 0.5 + 0.5) * 255).to(torch.uint8)

        fid_dc.update(r_uint, real=True); fid_dc.update(f_dc_uint, real=False)
        fid_si.update(r_uint, real=True); fid_si.update(f_si_uint, real=False)
        print(f"Epoch [{epoch+1}] FID_DC: {fid_dc.compute().item():.2f} | FID_Simple: {fid_si.compute().item():.2f}")
        fid_dc.reset(); fid_si.reset()

# --- 8. 最終比較画像出力 ---
print("Generating Final Comparison (128x128)...")
netD_dc.eval(); netG_dc.eval(); netG_si.eval()
gcam = GradCAM(netD_dc, netD_dc.main[10])

real_imgs, _ = next(iter(train_loader))
noise = torch.randn(2, nz, 1, 1, device=device)
f_dc = netG_dc(noise).detach()
f_si = netG_si(noise).detach()

fig, axes = plt.subplots(2, 4, figsize=(20, 10))

for i in range(2):
    row_name = ["1枚目", "2枚目"][i]
    # Real
    img_real = (real_imgs[i].permute(1,2,0)*0.5+0.5).clamp(0,1)
    axes[i, 0].imshow(img_real); axes[i, 0].set_title(f"real{row_name}"); axes[i, 0].axis("off")
    # DCGAN
    img_dc = (f_dc[i].cpu().permute(1,2,0)*0.5+0.5).clamp(0,1)
    axes[i, 1].imshow(img_dc); axes[i, 1].set_title(f"DCGAN{row_name}"); axes[i, 1].axis("off")
    # Grad-CAM
    mask, score = gcam.generate(f_dc[i:i+1])
    heatmap = cv2.applyColorMap(np.uint8(255 * cv2.resize(mask, (IMAGE_SIZE, IMAGE_SIZE))), cv2.COLORMAP_JET)
    blended = (cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)/255.0)*0.4 + img_dc.numpy()*0.6
    axes[i, 2].imshow(blended.clip(0,1)); axes[i, 2].set_title(f"DCGAN(Grad-Cam)\nS:{score:.3f}"); axes[i, 2].axis("off")
    # Simple GAN
    img_si = (f_si[i].cpu().permute(1,2,0)*0.5+0.5).clamp(0,1)
    axes[i, 3].imshow(img_si); axes[i, 3].set_title(f"GAN {row_name}"); axes[i, 3].axis("off")

plt.tight_layout()
plt.savefig(os.path.join(BASE_OUT_PATH, "comparison_128_fix.png"))
plt.show()
print(f"DONE. Path: {BASE_OUT_PATH}")

Using device: cuda | Resolution: 128x128
Using Colab cache for faster access to the 'celebahq-resized-256x256' dataset.


OutOfMemoryError: CUDA out of memory. Tried to allocate 384.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 64.12 MiB is free. Process 6271 has 14.68 GiB memory in use. Of the allocated memory 13.33 GiB is allocated by PyTorch, and 1.21 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)