In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from transformers import ViTFeatureExtractor, ViTForImageClassification
from sklearn.model_selection import train_test_split
from PIL import Image
import os  

# GPU 사용을 위한 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 데이터 경로 설정
data_folder = 'train'  # 데이터 폴더 경로
num_classes = 4  # 클래스 수

# 이미지 전처리 및 데이터 로더 설정
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # 이미지 크기 조정
    transforms.ToTensor(),  # 이미지를 텐서로 변환
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 이미지 정규화
])

class CustomDataset(Dataset):
    def __init__(self, data_folder, transform=None):
        self.data_folder = data_folder
        self.transform = transform
        self.image_paths = []  # 이미지 파일 경로 리스트
        self.labels = []  # 이미지 클래스 레이블 리스트

        for class_idx in range(num_classes):
            class_folder = os.path.join(data_folder, str(class_idx))
            image_files = os.listdir(class_folder)
            for image_file in image_files:
                self.image_paths.append(os.path.join(class_folder, image_file))
                self.labels.append(class_idx)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        label = self.labels[idx]
        image = Image.open(image_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return image, label

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from tqdm import tqdm

# 데이터셋 및 데이터 로더 생성
dataset = CustomDataset(data_folder, transform=transform)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# ViT 모델 및 옵티마이저 설정
model_name = 'google/vit-base-patch16-224-in21k'
feature_extractor = ViTFeatureExtractor(model_name)
model = ViTForImageClassification.from_pretrained(model_name, num_labels=num_classes).to(device)
optimizer = optim.AdamW(model.parameters(), lr=2e-5)

# 손실 함수 설정
criterion = nn.CrossEntropyLoss()

# 학습 설정
num_epochs = 10

# 모델 학습
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch + 1}/{num_epochs}"):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(images).logits
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    train_loss /= len(train_loader)

    # 에폭마다 학습 손실 출력
    print(f"Epoch [{epoch + 1}/{num_epochs}] - Loss: {train_loss:.4f}")

# 모델 평가
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in valid_loader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images).logits
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch 1/10: 100%|██████████| 92/92 [00:55<00:00,  1.65it/s]


Epoch [1/10] - Loss: 0.9621


Epoch 2/10: 100%|██████████| 92/92 [00:54<00:00,  1.68it/s]


Epoch [2/10] - Loss: 0.6225


Epoch 3/10: 100%|██████████| 92/92 [00:54<00:00,  1.69it/s]


Epoch [3/10] - Loss: 0.4659


Epoch 4/10: 100%|██████████| 92/92 [00:55<00:00,  1.67it/s]


Epoch [4/10] - Loss: 0.3446


Epoch 5/10: 100%|██████████| 92/92 [00:54<00:00,  1.68it/s]


Epoch [5/10] - Loss: 0.2578


Epoch 6/10: 100%|██████████| 92/92 [00:55<00:00,  1.67it/s]


Epoch [6/10] - Loss: 0.2117


Epoch 7/10: 100%|██████████| 92/92 [00:55<00:00,  1.67it/s]


Epoch [7/10] - Loss: 0.1855


Epoch 8/10: 100%|██████████| 92/92 [00:55<00:00,  1.67it/s]


Epoch [8/10] - Loss: 0.1201


Epoch 9/10: 100%|██████████| 92/92 [00:55<00:00,  1.66it/s]


Epoch [9/10] - Loss: 0.0899


Epoch 10/10: 100%|██████████| 92/92 [00:54<00:00,  1.67it/s]


Epoch [10/10] - Loss: 0.0889
Test Accuracy: 91.02%


In [7]:
# 모델 저장 경로 지정
model_save_path = 'vision_model.pth'

# 모델 상태 저장
torch.save(model.state_dict(), model_save_path)

In [8]:
num_classes = 4  # 클래스 수
# 이미지 전처리 설정
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # 이미지 크기 조정
    transforms.ToTensor(),  # 이미지를 텐서로 변환
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 이미지 정규화
])

# 모델 및 가중치 로드
feature_extractor = ViTFeatureExtractor(model_name)

model = ViTForImageClassification.from_pretrained(model_name, num_labels=num_classes).to(device) 
model.load_state_dict(torch.load(model_save_path))
model.eval()

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0): ViTLayer(
          (attention): ViTAttention(
            (attention): ViTSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_features=768, out_

In [26]:
test0_folder=os.listdir("./real_test/0_consensus")
test1_folder=os.listdir("./real_test/1_consensus")
test2_folder=os.listdir("./real_test/2_consensus")
test3_folder=os.listdir("./real_test/3_consensus")

## 0 consensus 예측

In [27]:
import pandas as pd 

name_list = []
Answer_list = []
predict_list = []
count = 0
for i in tqdm(test0_folder):
    # 예측할 이미지 불러오기
    image_path = './real_test/0_consensus/'+i
    image = Image.open(image_path).convert("RGB")
    image = transform(image)  # 전처리 적용

    # 모델을 CPU 또는 GPU로 이동
    model.to(device)

    # 입력 데이터를 CPU 또는 GPU로 이동
    image = image.to(device)


    # 모델을 통해 예측 수행
    with torch.no_grad():
        image = image.unsqueeze(0)  # 배치 차원 추가
        outputs = model(image)  # 모델에 이미지 입력
        _, predicted = torch.max(outputs.logits, 1)  # 클래스 예측

    # 예측 결과 출력
    predicted_class = predicted.item()
    name_list.append(i)
    Answer_list.append(0)
    predict_list.append(predicted_class)
    if 0 == predicted_class:
        count+=1

f = pd.DataFrame({'파일명':name_list,
              '정답':Answer_list,
              '예측':predict_list})
f.to_csv('0컨센서스예측결과.csv', index=False)
print("맞춘 횟수", count)


 54%|█████▍    | 54/100 [00:17<00:14,  3.13it/s]

## 1 consensus 예측

In [None]:
import pandas as pd 

name_list = []
Answer_list = []
predict_list = []
count = 0
for i in tqdm(test1_folder):
    # 예측할 이미지 불러오기
    image_path = './real_test/0_consensus/'+i
    image = Image.open(image_path).convert("RGB")
    image = transform(image)  # 전처리 적용

    # 모델을 CPU 또는 GPU로 이동
    model.to(device)

    # 입력 데이터를 CPU 또는 GPU로 이동
    image = image.to(device)


    # 모델을 통해 예측 수행
    with torch.no_grad():
        image = image.unsqueeze(0)  # 배치 차원 추가
        outputs = model(image)  # 모델에 이미지 입력
        _, predicted = torch.max(outputs.logits, 1)  # 클래스 예측

    # 예측 결과 출력
    predicted_class = predicted.item()
    name_list.append(i)
    Answer_list.append(0)
    predict_list.append(predicted_class)
    if 0 == predicted_class:
        count+=1

f = pd.DataFrame({'파일명':name_list,
              '정답':Answer_list,
              '예측':predict_list})
f.to_csv('0컨센서스예측결과.csv', index=False)
print("맞춘 횟수", count)


## 2 consensus 예측

In [None]:
for i in test3_folder:
    # 예측할 이미지 불러오기
    image_path = './real_test/0_consensus/'+i
    image = Image.open(image_path).convert("RGB")
    image = transform(image)  # 전처리 적용

    # 모델을 CPU 또는 GPU로 이동
    model.to(device)

    # 입력 데이터를 CPU 또는 GPU로 이동
    image = image.to(device)


    # 모델을 통해 예측 수행
    with torch.no_grad():
        image = image.unsqueeze(0)  # 배치 차원 추가
        outputs = model(image)  # 모델에 이미지 입력
        _, predicted = torch.max(outputs.logits, 1)  # 클래스 예측

    # 예측 결과 출력
    predicted_class = predicted.item()
    print(f"Predicted Class: {predicted_class}")

## 3 consensus 예측

In [20]:
for i in test4_folder:
    # 예측할 이미지 불러오기
    image_path = './real_test/0_consensus/'+i
    image = Image.open(image_path).convert("RGB")
    image = transform(image)  # 전처리 적용

    # 모델을 CPU 또는 GPU로 이동
    model.to(device)

    # 입력 데이터를 CPU 또는 GPU로 이동
    image = image.to(device)


    # 모델을 통해 예측 수행
    with torch.no_grad():
        image = image.unsqueeze(0)  # 배치 차원 추가
        outputs = model(image)  # 모델에 이미지 입력
        _, predicted = torch.max(outputs.logits, 1)  # 클래스 예측

    # 예측 결과 출력
    predicted_class = predicted.item()
    print(f"Predicted Class: {predicted_class}")

FileNotFoundError: [Errno 2] No such file or directory: '/home/ohseunghwan/AIproject/pathology_cls/real_test/0_consensus/22S_001868_A_3_FC0101_4_C-ERB-B2(RO)_20220111_182358_0_57344_1024_mpp_1024_.png'