In [75]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim


In [76]:
# 데이터 준비
train_transform = transforms.Compose([
    transforms.Resize((50, 50)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

test_transform = transforms.Compose([
    transforms.Resize((50, 50)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

train_dataset = torchvision.datasets.ImageFolder(root='../data/img/train', transform=train_transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = torchvision.datasets.ImageFolder(root='../data/img/test', transform=test_transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

# 모델 정의 (간단한 예시)
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 56 * 56, 512)
        self.fc2 = nn.Linear(512, 1)  # 이진 분류이므로 출력 뉴런을 1로 설정

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64 * 56 * 56)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)  # 이진 분류이므로 시그모이드 활성화 함수를 사용하지 않음
        return x

# 모델 초기화 및 손실 함수, 옵티마이저 정의
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)
criterion = nn.BCEWithLogitsLoss()  # 이진 교차 엔트로피 손실 함수
criterion = nn.B
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# 모델 훈련
for epoch in range(10):  # 예시로 10 에폭으로 설정
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data[0].to(device), data[1].float().to(device)  # 레이블을 float으로 변환
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels.unsqueeze(1))  # BCEWithLogitsLoss를 사용하므로 레이블의 shape을 [batch_size, 1]로 만들어줌
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 10 == 9:  # 10 배치마다 손실 출력
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 10))
            running_loss = 0.0

print('Finished Training')

# 모델 평가
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data[0].to(device), data[1].float().to(device)  # 레이블을 float으로 변환
        outputs = model(images)
        predicted = (outputs > 0).float()  # outputs가 0보다 크면 1로, 그렇지 않으면 0으로 예측
        total += labels.size(0)
        correct += (predicted == labels.unsqueeze(1)).sum().item()

print('Accuracy : %d %%' % (
    100 * correct / total))



[1,    10] loss: 0.690
[1,    20] loss: 0.684
[1,    30] loss: 0.680
[2,    10] loss: 0.635
[2,    20] loss: 0.608
[2,    30] loss: 0.586
[3,    10] loss: 0.558
[3,    20] loss: 0.556
[3,    30] loss: 0.536
[4,    10] loss: 0.474
[4,    20] loss: 0.493
[4,    30] loss: 0.507
[5,    10] loss: 0.460
[5,    20] loss: 0.418
[5,    30] loss: 0.438
[6,    10] loss: 0.393
[6,    20] loss: 0.402
[6,    30] loss: 0.373
[7,    10] loss: 0.344
[7,    20] loss: 0.377
[7,    30] loss: 0.345
[8,    10] loss: 0.308
[8,    20] loss: 0.290
[8,    30] loss: 0.327
[9,    10] loss: 0.275
[9,    20] loss: 0.260
[9,    30] loss: 0.279
[10,    10] loss: 0.351
[10,    20] loss: 0.247
[10,    30] loss: 0.222
Finished Training
Accuracy : 82 %


In [77]:
import torch
# 모델 저장
torch.save(model.state_dict(), 'best_cnn.pth')

In [80]:
import torchvision.transforms as transforms
from torchvision.transforms.functional import to_pil_image
import cv2
from cnn import CNN  

# 모델 저장
torch.save(model.state_dict(), 'best_cnn.pth')

# 모델 정의
model = CNN()
model.load_state_dict(torch.load('best_cnn.pth'))  # 저장된 모델을 불러옴
model.eval()

# 이미지 전처리
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# 이미지 로드 및 전처리
image_path = '../data/img/test/chihuahua/img_0_18.jpg'  
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # OpenCV는 이미지를 BGR 형식으로 읽어옴 그 후 RGB로 변환
img_pil = to_pil_image(image)
input_image = transform(img_pil)
input_image = input_image.unsqueeze(0) 

# 모델을 통해 예측 수행
with torch.no_grad():
    output = model(input_image)
    _, predicted = torch.max(output, 1)
predicted_prob = torch.softmax(output, dim=1)[0][predicted.item()].item()
print(f"{'치와와' if predicted.item() == 1 else '머핀'}입니다!")


# 예측 결과 해석
class_names = ['chihuahua', 'muffin']  # 클래스 레이블을 정의합니다.
predicted_class = class_names[predicted.item()]
print('Predicted class:', predicted_class)


머핀입니다!
Predicted class: chihuahua


In [79]:
# 이미지 로드 및 전처리
image_path = '../data/img/test/chihuahua/img_0_18.jpg'  # 테스트할 이미지의 경로를 지정합니다.
image = cv2.imread(image_path)
#image = image.open(image_path)
img_pil = to_pil_image(image)
input_image = transform(img_pil)
input_image = input_image.unsqueeze(0)  # 배치 차원을 추가합니다.

# 모델을 통해 예측 수행
with torch.no_grad():
    output = model(input_image)
    probabilities = torch.softmax(output, dim=1)
    predicted_class_index = torch.argmax(probabilities, dim=1).item()
    predicted_probability = probabilities[0, predicted_class_index].item()
    predicted_class = 'chihuahua' if predicted_class_index == 1 else 'muffin'

# 예측 결과 출력
print(f"Predicted class: {predicted_class} (확률: {predicted_probability:.2f})")

Predicted class: muffin (확률: 1.00)
