In [1]:
import cv2

In [2]:
import torch

In [6]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from transformers import AutoImageProcessor, AutoModelForImageClassification

# 1. 경로 설정
data_dir = './Rock-Paper-Scissors'
train_dir = os.path.join(data_dir, 'train3')
test_dir = os.path.join(data_dir, 'test')

# 2. 데이터 전처리
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Pretrained 모델에 맞게 크기 조정
    transforms.ToTensor(),          # 텐서로 변환
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # 3채널 정규화
])

# 3. 데이터셋 로드
train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
test_dataset = datasets.ImageFolder(root=test_dir, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# 클래스 이름 확인
classes = train_dataset.classes
print(f"Classes: {classes}")

# 4. CUDA 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 5. Pretrained 모델 불러오기
pretrained_model_name = "microsoft/resnet-50"
model = AutoModelForImageClassification.from_pretrained(
    pretrained_model_name,
    num_labels=3,  # 가위바위보 3개 클래스로 분류
    ignore_mismatched_sizes=True  # Pretrained 모델과 출력 레이어 크기가 다를 때 강제로 맞춤
)

# Feature Extractor 출력 크기 확인 및 분류기 수정
model.classifier = nn.Sequential(
    nn.Flatten(),  # Feature Map을 1D 벡터로 변환
    nn.Linear(2048, 3)  # ResNet Feature Extractor의 출력 크기와 맞춤
)

# 모델을 GPU 또는 CPU로 이동
model = model.to(device)

# 6. 손실 함수와 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# 7. 학습 루프
def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)  # 데이터를 GPU로 전송

            optimizer.zero_grad()
            outputs = model(images)  # Forward pass
            loss = criterion(outputs.logits, labels)  # Loss 계산 (logits 사용)
            loss.backward()  # Backward pass
            optimizer.step()  # Optimizer 업데이트

            running_loss += loss.item()
        
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader):.4f}")

# 8. 평가 루프
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)  # 데이터를 GPU로 전송
            outputs = model(images)
            _, predicted = torch.max(outputs.logits, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    accuracy = 100 * correct / total
    print(f"Accuracy: {accuracy:.2f}%")

# 9. 모델 저장 함수 추가
def save_model(model, save_path):
    torch.save(model.state_dict(), save_path)
    print(f"Model saved to {save_path}")

# 10. 모델 학습
train_model(model, train_loader, criterion, optimizer, num_epochs=10)

# 11. 모델 평가
evaluate_model(model, test_loader)

# 12. 학습한 모델 저장
save_path = './rock_paper_scissors_pretrained_model.pth2'
save_model(model, save_path)


Classes: ['paper', 'rock', 'scissors']
Using device: cuda


Some weights of ResNetForImageClassification were not initialized from the model checkpoint at microsoft/resnet-50 and are newly initialized because the shapes did not match:
- classifier.1.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([3]) in the model instantiated
- classifier.1.weight: found shape torch.Size([1000, 2048]) in the checkpoint and torch.Size([3, 2048]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/10, Loss: 0.8152
Epoch 2/10, Loss: 0.1450
Epoch 3/10, Loss: 0.0385
Epoch 4/10, Loss: 0.0234
Epoch 5/10, Loss: 0.0115
Epoch 6/10, Loss: 0.0128
Epoch 7/10, Loss: 0.0116
Epoch 8/10, Loss: 0.0051
Epoch 9/10, Loss: 0.0115
Epoch 10/10, Loss: 0.0071
Accuracy: 91.67%
Model saved to ./rock_paper_scissors_pretrained_model.pth2


In [8]:
import cv2
import torch
import torch.nn as nn
from torchvision import transforms
from transformers import AutoModelForImageClassification

# 1. 모델 불러오기
model_path = './rock_paper_scissors_pretrained_model.pth'
pretrained_model_name = "microsoft/resnet-50"

# Pretrained 모델 로드
model = AutoModelForImageClassification.from_pretrained(
    pretrained_model_name,
    num_labels=3,
    ignore_mismatched_sizes=True
)
model.classifier = nn.Sequential(
    nn.Flatten(),
    nn.Linear(2048, 3)  # 가위, 바위, 보
)

# 모델 가중치 불러오기
model.load_state_dict(torch.load(model_path))
model.eval()

# CUDA 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# 클래스 이름
classes = ['paper', 'rock', 'scissors']

# 2. 이미지 전처리 함수
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),  # 모델 입력 크기에 맞게 조정
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # 학습 시 정규화와 동일
])

# 3. 실시간 카메라 캡처
cap = cv2.VideoCapture(0)  # 웹캠 열기
if not cap.isOpened():
    print("Could not open webcam.")
    exit()

print("Press 'q' to exit.")

while True:
    ret, frame = cap.read()
    if not ret:
        print("Failed to capture frame.")
        break

    # 화면에 프레임 출력
    cv2.imshow("Rock-Paper-Scissors Classifier", frame)

    # 프레임 중앙 부분을 잘라내기 (옵션)
    h, w, _ = frame.shape
    min_dim = min(h, w)
    crop_img = frame[(h - min_dim) // 2:(h + min_dim) // 2, (w - min_dim) // 2:(w + min_dim) // 2]

    # 이미지 전처리
    input_image = transform(crop_img).unsqueeze(0).to(device)

    # 모델 예측
    with torch.no_grad():
        outputs = model(input_image)
        _, predicted = torch.max(outputs.logits, 1)
        predicted_class = classes[predicted.item()]

    # 예측 결과 출력
    cv2.putText(frame, f"Prediction: {predicted_class}", (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # 화면에 표시
    cv2.imshow("Rock-Paper-Scissors Classifier", frame)

    # 'q' 키를 누르면 종료
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# 카메라 및 창 닫기
cap.release()
cv2.destroyAllWindows()


Some weights of ResNetForImageClassification were not initialized from the model checkpoint at microsoft/resnet-50 and are newly initialized because the shapes did not match:
- classifier.1.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([3]) in the model instantiated
- classifier.1.weight: found shape torch.Size([1000, 2048]) in the checkpoint and torch.Size([3, 2048]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Press 'q' to exit.
