#### 学習用画像作成

In [41]:
from PIL import Image, ImageDraw
import random

def create_train_image():
    # 画像サイズ
    img_size = (28, 28)
    # 矩形のサイズ
    rect_size = 20
    # 画像の初期化（グレースケールモード）
    image = Image.new('L', img_size, 0)
    draw = ImageDraw.Draw(image)
    
    # 矩形の左上座標をランダムに決定（矩形が画像内に収まる範囲で）
    x = random.randint(0, img_size[0] - rect_size)
    y = random.randint(0, img_size[1] - rect_size)
    
    # 矩形の輝度（160〜200の間からランダムに選択）
    brightness = random.randint(160, 255)
    
    # 矩形を描画
    draw.rectangle([x, y, x + rect_size - 1, y + rect_size - 1], fill=brightness)
    
    return image

# 10枚の画像を生成して保存
for i in range(100):
    img = create_train_image()
    img.save(f"data/raw/train/{i}_input.png")


#### テスト用画像

In [42]:
from PIL import Image, ImageDraw
import random

def create_image(circle_radius):
    # 画像サイズ
    img_size = (28, 28)
    # 矩形のサイズ
    rect_size = 20
    # 画像の初期化（グレースケールモード）
    image = Image.new('L', img_size, 0)
    draw = ImageDraw.Draw(image)
    
    # 矩形の左上座標を画像の中央に設定
    x = (img_size[0] - rect_size) // 2
    y = (img_size[1] - rect_size) // 2
    
    # 矩形の輝度（160〜255の間からランダムに選択）
    brightness = random.randint(160, 255)
    
    # 中央に矩形を描画
    draw.rectangle([x, y, x + rect_size - 1, y + rect_size - 1], fill=brightness)
    
    # 円のサイズと位置を計算（矩形の中に中央配置）
    circle_x = x + rect_size // 2 - circle_radius
    circle_y = y + rect_size // 2 - circle_radius
    circle_bbox = [circle_x, circle_y, circle_x + 2 * circle_radius, circle_y + 2 * circle_radius]
    
    # 矩形の中に輝度0の円を描画
    draw.ellipse(circle_bbox, fill=0)
    
    return image

# 画像を生成して確認
image = create_image(10)
image.save("data/raw/test/0_input.png")
image = create_image(2)
image.save("data/raw/test/1_input.png")

#### DataSetの作成

In [43]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset
from torchvision import transforms

class PngDataset(Dataset):
    def __init__(self, folder_path, transform=None):
        self.folder_path = folder_path
        self.transform = transform
        self.image_paths = [os.path.join(folder_path, f) 
                            for f in os.listdir(folder_path) 
                            if f.lower().endswith('.png')]

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('L')  # グレースケールで読み込む
        image = transforms.ToTensor()(image)
        image = torch.flatten(image)
        return image

folder_path = "data/raw/train"
dataset = PngDataset(folder_path)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=10, shuffle=True)

#### オートエンコーダによる学習

In [44]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
    
# 簡単なオートエンコーダモデルの定義
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        # エンコーダ部分
        self.encoder = nn.Sequential(
            nn.Linear(28 * 28, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU()
        )
        # デコーダ部分
        self.decoder = nn.Sequential(
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, 28 * 28),
            nn.Sigmoid()  # 出力を0～1に制限
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

# モデル、損失関数、最適化手法の定義
model = Autoencoder()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 学習ループ
epochs = 100
for epoch in range(epochs):
    for inputs in dataloader:
        outputs = model(inputs)
        loss = criterion(outputs, inputs)  # 入力と出力の差を計算
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()    
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')
print("Training complete.")


Epoch [1/100], Loss: 0.1447
Epoch [2/100], Loss: 0.0919
Epoch [3/100], Loss: 0.1071
Epoch [4/100], Loss: 0.0987
Epoch [5/100], Loss: 0.0897
Epoch [6/100], Loss: 0.0865
Epoch [7/100], Loss: 0.0894
Epoch [8/100], Loss: 0.0720
Epoch [9/100], Loss: 0.0446
Epoch [10/100], Loss: 0.0391
Epoch [11/100], Loss: 0.0379
Epoch [12/100], Loss: 0.0268
Epoch [13/100], Loss: 0.0305
Epoch [14/100], Loss: 0.0249
Epoch [15/100], Loss: 0.0244
Epoch [16/100], Loss: 0.0208
Epoch [17/100], Loss: 0.0188
Epoch [18/100], Loss: 0.0200
Epoch [19/100], Loss: 0.0178
Epoch [20/100], Loss: 0.0182
Epoch [21/100], Loss: 0.0167
Epoch [22/100], Loss: 0.0181
Epoch [23/100], Loss: 0.0195
Epoch [24/100], Loss: 0.0148
Epoch [25/100], Loss: 0.0144
Epoch [26/100], Loss: 0.0155
Epoch [27/100], Loss: 0.0179
Epoch [28/100], Loss: 0.0140
Epoch [29/100], Loss: 0.0166
Epoch [30/100], Loss: 0.0165
Epoch [31/100], Loss: 0.0149
Epoch [32/100], Loss: 0.0106
Epoch [33/100], Loss: 0.0115
Epoch [34/100], Loss: 0.0129
Epoch [35/100], Loss: 0

#### オートエンコーダによる推論

In [46]:
import matplotlib.pyplot as plt
import numpy as np

folder_path = "data/raw/test"
dataset = PngDataset(folder_path)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True)

model.eval()
for i, inputs in enumerate(dataloader):
    with torch.no_grad():
        reconstructed = model(inputs)
        inputs_img = inputs[0,:].reshape(28, 28).numpy()
        inputs_img = (inputs_img*255).astype(np.uint8)
        reconstructed_img = reconstructed[0,:].reshape(28, 28).numpy()
        reconstructed_img = (reconstructed_img*255).astype(np.uint8)
        diff_img = np.abs(inputs_img.astype(np.int16) - reconstructed_img.astype(np.int16))
        diff_img =diff_img.astype(np.uint8)

        reconstructed_img = Image.fromarray(reconstructed_img)
        reconstructed_img.save(f"results/{i}_recon.png")
        diff_img = Image.fromarray(diff_img)
        diff_img.save(f"results/{i}_diff.png")
