# 圖片分類

In [None]:
import os
import shutil

# 建立目標資料夾
if not os.path.exists('./my_images'):
    os.mkdir('./my_images')

# 遍歷資料夾中的檔案
for filename in os.listdir('./hiragana_images'):
    # 判斷檔案是否為 jpg 檔
    if filename.endswith('.jpg'):
        # 取得檔名中的非數字字元
        class_name = os.path.splitext(''.join(filter(lambda x: not x.isdigit(), filename)))[0]
        # 建立目標資料夾
        target_folder = os.path.join('./my_images', class_name)
        if not os.path.exists(target_folder):
            os.mkdir(target_folder)
        # 複製檔案到對應的資料夾中
        shutil.copy(os.path.join('./hiragana_images', filename), os.path.join(target_folder, filename))


# 訓練

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

In [2]:
# 超参数
num_classes = 50  # 50音分类
batch_size = 20
num_epochs = 10
learning_rate = 0.001

In [3]:
# 数据预处理
transform = transforms.Compose([
    transforms.Resize((32, 32)),  # 调整大小为 32x32
    transforms.Grayscale(num_output_channels=1),  # 转为单通道灰度图像
    transforms.ToTensor()  # 转为张量
])

In [4]:
# 载入数据集
train_dataset = datasets.ImageFolder(root='F:\pycharm_pro\pytorch_learn\my_images', transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
label_dict = train_dataset.class_to_idx

# test_dataset = datasets.ImageFolder(root='F:/pycharm_pro/pytorch_learn/test_images', transform=transform)
# test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [5]:
# 定义模型
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.fc = nn.Linear(8*8*32, num_classes)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        return out


In [6]:
# 实例化模型、损失函数和优化器
model = ConvNet()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


In [7]:
# 训练模型
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # 前向传播
        outputs = model(images)
        loss = criterion(outputs, labels)

        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 10 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                  .format(epoch+1, num_epochs, i+1, total_step, loss.item()))


Epoch [1/10], Step [10/50], Loss: 3.5841
Epoch [1/10], Step [20/50], Loss: 1.9819
Epoch [1/10], Step [30/50], Loss: 0.9321
Epoch [1/10], Step [40/50], Loss: 0.5037
Epoch [1/10], Step [50/50], Loss: 0.4462
Epoch [2/10], Step [10/50], Loss: 0.1532
Epoch [2/10], Step [20/50], Loss: 0.0967
Epoch [2/10], Step [30/50], Loss: 0.1233
Epoch [2/10], Step [40/50], Loss: 0.1963
Epoch [2/10], Step [50/50], Loss: 0.0874
Epoch [3/10], Step [10/50], Loss: 0.0387
Epoch [3/10], Step [20/50], Loss: 0.1032
Epoch [3/10], Step [30/50], Loss: 0.0648
Epoch [3/10], Step [40/50], Loss: 0.1433
Epoch [3/10], Step [50/50], Loss: 0.0875
Epoch [4/10], Step [10/50], Loss: 0.0351
Epoch [4/10], Step [20/50], Loss: 0.0171
Epoch [4/10], Step [30/50], Loss: 0.0181
Epoch [4/10], Step [40/50], Loss: 0.0115
Epoch [4/10], Step [50/50], Loss: 0.0318
Epoch [5/10], Step [10/50], Loss: 0.0155
Epoch [5/10], Step [20/50], Loss: 0.0056
Epoch [5/10], Step [30/50], Loss: 0.0093
Epoch [5/10], Step [40/50], Loss: 0.0090
Epoch [5/10], St

In [8]:
# # 测试模型
# model.eval()
# with torch.no_grad():
#     correct = 0
#     total = 0
#     for images, labels in test_loader:
#         outputs = model(images)
#         _, predicted = torch.max(outputs.data, 1)
#         total += labels.size(0)
#         correct += (predicted == labels).sum().item()
#     print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))


In [9]:
# 保存模型
torch.save(model.state_dict(), '50on_model.ckpt')

# 手寫測試

In [12]:
from PIL import Image
# 轉換圖片
test_transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
])

# 載入圖片
img = Image.open('F:\\pycharm_pro\\pytorch_learn\\test_images\\HE.jpg').convert('L')
img = test_transform(img)

# 增加一維，批次大小設為 1
img = img.unsqueeze(0)

# 使用訓練好的模型進行預測
model.eval()
with torch.no_grad():
    output = model(img)
    pred = output.argmax(dim=1)


print(label_dict)
print(pred.item())

{'kanaA': 0, 'kanaBA': 1, 'kanaCHI': 2, 'kanaDA': 3, 'kanaE': 4, 'kanaFU': 5, 'kanaHA': 6, 'kanaHE': 7, 'kanaHI': 8, 'kanaHO': 9, 'kanaI': 10, 'kanaJI': 11, 'kanaKA': 12, 'kanaKE': 13, 'kanaKI': 14, 'kanaKO': 15, 'kanaKU': 16, 'kanaMA': 17, 'kanaME': 18, 'kanaMI': 19, 'kanaMO': 20, 'kanaMU': 21, 'kanaN': 22, 'kanaNA': 23, 'kanaNE': 24, 'kanaNI': 25, 'kanaNO': 26, 'kanaNU': 27, 'kanaO': 28, 'kanaPI': 29, 'kanaRA': 30, 'kanaRE': 31, 'kanaRI': 32, 'kanaRO': 33, 'kanaRU': 34, 'kanaSA': 35, 'kanaSE': 36, 'kanaSHI': 37, 'kanaSO': 38, 'kanaSU': 39, 'kanaTA': 40, 'kanaTE': 41, 'kanaTO': 42, 'kanaTSU': 43, 'kanaU': 44, 'kanaWA': 45, 'kanaWO': 46, 'kanaYA': 47, 'kanaYO': 48, 'kanaYU': 49}
7
