In [None]:
cd ..

In [None]:
cd ..

In [None]:
cd ./disk1/colonoscopy_datasetv2/cropped

In [None]:
pwd

GPU 지정 & 라이브러리

In [None]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import torch
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

In [None]:
# 모듈 임포트
from PIL import Image
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms
from torch import nn

print(torch.cuda.is_available())

Custom Dataset 구현

In [None]:
class CustomDataset(Dataset):
    def readImg(self):
        all_img_files = []
        all_labels = []

        class_names = os.walk(self.dataset_path).__next__()[1]

        for idx, class_name in enumerate(class_names):
            img_dir = os.path.join(self.dataset_path, class_name)
            img_files = os.walk(img_dir).__next__()[2]
            
            for img in img_files:
                if img[4:8] == 'MASK':
                    continue
                img_path = os.path.join(img_dir,img)
                image = Image.open(img_path)
                if image is not None:
                    all_img_files.append(img_path)
                    all_labels.append(idx)

        return all_img_files, all_labels, len(class_names), len(all_img_files)

    def __init__(self, dataset_path, img_transforms=None):
        self.dataset_path = dataset_path
        self.img_transforms = img_transforms
        self.img_files, self.labels, self.num_classes, self.num_images = self.readImg()

    def __getitem__(self, index):
        image = self.img_files[index]
        image = Image.open(image)
        image = image.convert('RGB')
        label = self.labels[index]

        if self.img_transforms is not None:
            image = self.img_transforms(image)


        return {'image':image, 'label':label}

    def __len__(self):
        return self.num_images


모듈 구현

In [None]:
def train_epoch(model, dataloader, optimizer):
    model.train()
    crit = nn.CrossEntropyLoss()
    sum_loss = 0

    for item in dataloader:
        images = item['image'].to(device)
        labels = item['label'].to(device)

        outputs = model(images)
        loss = crit(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        sum_loss += loss.item()*len(images)
    
    return sum_loss / len(dataloader.dataset)

def validate_epoch(model, dataloader):
    model.eval()
    crit = nn.CrossEntropyLoss()
    sum_loss = 0
    correct = 0
    total = len(dataloader.dataset)

    with torch.no_grad():
        for item in dataloader:
            images = item['image'].to(device)
            labels = item['label'].to(device)

            outputs = model(images)
            loss = crit(outputs, labels)

            sum_loss += loss.item()*len(images)

            _, pred = torch.max(outputs.data,1)
            correct += (pred==labels).sum().item() # true(1)만 합하게 됨

        accuracy = correct / total * 100

        return sum_loss / len(dataloader.dataset), accuracy


데이터셋 분할

In [None]:
data_transforms = transforms.Compose([transforms.Resize((224,224)), transforms.ToTensor(), 
                        transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])])
total_dataset = CustomDataset('./', data_transforms)
train_size = int(total_dataset.num_images*0.8)
validation_size = int(total_dataset.num_images*0.1)
test_size = total_dataset.num_images - train_size - validation_size

train_dataset, validation_dataset, test_dataset = random_split(total_dataset, [train_size, validation_size, test_size],generator=torch.Generator().manual_seed(42))

print('Total dataset size: {}'.format(total_dataset.num_images))
print('Train dataset size: {}'.format(train_size))
print('Validation dataset size: {}'.format(validation_size))
print('Test size: {}'.format(test_size))


EfficientNet 모델 불러오기 & 훈련

In [None]:
from torchvision import models
import ssl

ssl._create_default_https_context = ssl._create_unverified_context

model = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_efficientnet_b0', pretrained=True)

# 하이퍼파라미터 설정
hy_batch = 32
hy_epoch = 80
hy_lr = 0.00001

# PATH = '../../../home/bokyoungk/classification_models/vgg16/'
# model = torch.load(PATH+'min_model_23.pt')
# model.load_state_dict(torch.load(PATH+'min_model_state_23.pt'))
# checkpoint = torch.load(PATH+'min_all_23th.tar')
# model.load_state_dict(checkpoint['model'])
# optimizer.load_state_dict(checkpoint['optimizer'])


# fine tuning
num_classes = total_dataset.num_classes
num_ftrs = model.classifier.fc.in_features
model.classifier.fc = nn.Linear(num_ftrs,num_classes)
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=hy_lr)
print(model)

train_loader = DataLoader(train_dataset, batch_size=hy_batch, shuffle=False)
validation_loader = DataLoader(validation_dataset, batch_size=hy_batch, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=hy_batch, shuffle=False)


In [None]:
from torchsummary import summary

summary(model, (3,224,224))

In [None]:
import numpy as np
import matplotlib.pyplot as plt

PATH = '../../../home/bokyoungk/classification_models/efficientnet/'
min_loss = 1
max_accuracy = 80.0

all_train_loss = []
all_validation_loss = []
all_accuracy = []


for e in range(0, hy_epoch):
    print('------------------------------epoch {}-------------------------------'.format(e+1))
    train_loss = train_epoch(model,train_loader,optimizer=optimizer)
    validation_loss, accuracy = validate_epoch(model,validation_loader)
    print('train_loss: {}'.format(train_loss))
    print('validation_loss: {}'.format(validation_loss))
    print('Valid Acc: {}%'.format(accuracy))
    all_train_loss.append(train_loss)
    all_validation_loss.append(validation_loss)
    all_accuracy.append(accuracy)

    # 최소 val loss 모델 저장
    # if min_loss > validation_loss:
    #     min_loss = validation_loss
    #     torch.save(model,PATH+'min_model_{}.pt'.format(e+1))
    #     torch.save(model.state_dict(),PATH+'min_model_state_{}.pt'.format(e+1))
    #     torch.save({
    #         'model':model.state_dict(),
    #         'optimizer':optimizer.state_dict()
    #     },PATH+'min_all_{}th.tar'.format(e+1))
    
    # # 최대 val accuracy 모델 저장
    # if max_accuracy < accuracy:
    #     max_accuracy = accuracy
    #     torch.save(model,PATH+'max_acc_model_{}.pt'.format(e+1))
    #     torch.save(model.state_dict(),PATH+'max_acc_model_state_{}.pt'.format(e+1))
    #     torch.save({
    #         'model':model.state_dict(),
    #         'optimizer':optimizer.state_dict()
    #     },PATH+'max_acc_all_{}.tar'.format(e+1))

    if (e+1)==60:
        torch.save(model,PATH+'model_{}th.pt'.format(e+1))
        torch.save(model.state_dict(),PATH+'model_state_{}th.pt'.format(e+1))
        torch.save({
            'model':model.state_dict(),
            'optimizer':optimizer.state_dict()
        },PATH+'all_{}th.tar'.format(e+1))


In [None]:
# 시각화
x = np.arange(1,hy_epoch+1,step=1)

plt.figure(figsize=(15,10))
plt.subplot(1,2,1)
plt.xlabel('Epoch: ')
plt.ylabel('Loss: ')
plt.plot(x,all_train_loss,label='train loss')
plt.plot(x,all_validation_loss,label='validation loss')
plt.legend()
plt.show()

plt.subplot(1,2,2)
plt.xlabel('Epoch: ')
plt.ylabel('Accuracy(%): ')
plt.plot(x,all_accuracy)
plt.show()

모델 성능 평가

In [None]:
model = torch.load(PATH+'model_60th.pt')
model.load_state_dict(torch.load(PATH+'model_state_60th.pt'))
checkpoint = torch.load(PATH+'all_60th.tar')
model.load_state_dict(checkpoint['model'])
model.eval()

with torch.no_grad():
    correct = 0
    total = len(test_loader.dataset)
    for item in test_loader:
        images = item['image'].to(device)
        labels = item['label'].to(device)

        outputs = model(images)
        _, pred = torch.max(outputs,1)
        correct += (pred==labels).sum().item()
    
    print('Test accuracy of the model on the {} test images: {}%'.format(total, 100*correct/total))