In [11]:
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from torch import FloatTensor as tensor
from torch.utils.data import TensorDataset, DataLoader
import csv
import numpy as np
import matplotlib.pyplot as plt
from torchvision import datasets, transforms

In [12]:
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # 컬러 이미지를 흑백으로 변환
    transforms.Resize((128, 128)),               # 모든 이미지를 128x128로 크기 조정
    transforms.ToTensor(),                        # 이미지를 PyTorch 텐서로 변환
    transforms.Normalize(mean=[0.5], std=[0.5])   # 픽셀 값을 [-1, 1]로 정규화
])

In [13]:
data_dir = "ASL_Gestures_36_Classes"
dataset_train = datasets.ImageFolder(root=data_dir+"/train", transform=transform)
dataset_test = datasets.ImageFolder(root=data_dir+"/test", transform=transform)

In [14]:
data_loader = DataLoader(dataset_train, batch_size=32, shuffle=True, num_workers=2)

In [15]:
train_loader = DataLoader(dataset_train, batch_size=32, shuffle=True, num_workers=2)
test_loader = DataLoader(dataset_test, batch_size=32, shuffle=False, num_workers=2)

In [16]:
class CNNModel(nn.Module):
    def __init__(self, num_classes=36):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64 * 32 * 32, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64 * 32 * 32)  # Flatten
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x


model = CNNModel()

In [17]:
def train(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    return running_loss / len(loader)

In [18]:
def evaluate(model, loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total


In [19]:
torch.cuda.is_available()

True

In [20]:
# 모델, 손실 함수, 옵티마이저 정의
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNNModel(num_classes=len(dataset_train.classes)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 학습 설정
epochs = 10
for epoch in range(epochs):
    train_loss = train(model, train_loader, criterion, optimizer, device)
    accuracy = evaluate(model, test_loader, device)
    print(f"Epoch {epoch+1}/{epochs}, Loss: {train_loss:.4f}, Accuracy: {accuracy:.4f}")

Epoch 1/10, Loss: 2.9799, Accuracy: 0.6508
Epoch 2/10, Loss: 0.6190, Accuracy: 0.8810
Epoch 3/10, Loss: 0.2409, Accuracy: 0.9365
Epoch 4/10, Loss: 0.1215, Accuracy: 0.9583
Epoch 5/10, Loss: 0.0671, Accuracy: 0.9643
Epoch 6/10, Loss: 0.0306, Accuracy: 0.9583
Epoch 7/10, Loss: 0.0267, Accuracy: 0.9683
Epoch 8/10, Loss: 0.0093, Accuracy: 0.9722
Epoch 9/10, Loss: 0.0062, Accuracy: 0.9782
Epoch 10/10, Loss: 0.0036, Accuracy: 0.9742


epoch 10 cuda 12.4 RTX 4060 laptop : 1m 12s<br>
epoch 10 cpu i9 : 1m 59s<br>