In [1]:
import torch
import glob
import os
# glob 결과 숫자 오름차순으로 정리해주는 라이브러리, 기능적으로 필요하지 않았음을 깨달았으나
# 정렬 작업이 유지보수를 가정했을 때 충분히 의미 있다고 생각해서 그냥 놔두기로 함
import natsort
from PIL import Image
from torchvision import transforms
from torch.utils.data import DataLoader
from torch import nn
from torchvision import models
from torchsummary import summary
from torch.utils.tensorboard import SummaryWriter

import numpy as np
import time

In [2]:
# 구글 드라이브 마운트
from google.colab import drive
drive.mount('/content/drive')
import drive.MyDrive.Colab_Notebooks.resnet_datanmodel as datanmodel

Mounted at /content/drive


In [3]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

In [4]:
# 경로 설정, py파일로 변환시 경로는 변경되어야 한다.
# local path
# path=os.path.abspath('../')
# colab path
path=os.path.abspath('./drive/MyDrive/Colab_Notebooks/')

# Resize: 크기를 224, 224로 맞춘다
# ToTensor: 데이터 타입을 Tensor로 만든다. Tensor의 원소는 0~1로 정해진다.(https://pytorch.org/vision/stable/generated/torchvision.transforms.ToTensor.html#torchvision.transforms.ToTensor)
# custom으로 transform를 작성하는 것도 가능하다.
transforms=transforms.Compose([
    transforms.Resize(size=(224, 224)),
    transforms.ToTensor()])

In [5]:
cnd_test=datanmodel.cnd_data(file_path=path, train=False, transforms=transforms)

In [6]:
batch=20
cnd_dataloader=DataLoader(cnd_test, batch_size=batch, shuffle=True)

In [7]:
test_model=datanmodel.ResNet_compat().to(device)
# summary(test_model, input_size=(3, 224, 224))
# print(test_model)

In [8]:
loss_f= nn.CrossEntropyLoss()

In [9]:
log_save_path=os.path.abspath('./drive/MyDrive/Colab_Notebooks/resnet/resnet_log/')
weight_save_path=os.path.abspath('./drive/MyDrive/Colab_Notebooks/resnet/resnet_pth/')

In [10]:
writer=SummaryWriter(log_save_path)

In [11]:
weight_list=natsort.natsorted(glob.glob(weight_save_path+'/*.pth'), reverse=False)

In [12]:
# 30 epoch까지의 값들을 확인한다.
START_EPOCHS=30
EPOCHS=35
weight_list=weight_list[START_EPOCHS:EPOCHS]

In [None]:
for epoch_path in weight_list:

    print(epoch_path)

    # GPU 사용 불가시
    if device=='cpu':
      loaded_weight=torch.load(weight_list[0], map_location=torch.device('cpu'))
      if isinstance(test_model,nn.DataParallel):
        print('cpu 병렬')
      else:
        print('cpu 병렬 x')

    # GPU 사용 가능시
    else:
      loaded_weight=torch.load(weight_list[0])
      if isinstance(test_model,nn.DataParallel):
        print('gpu 병렬')
      else:
        print('gpu 병렬 x')

    model_key=test_model.state_dict().keys()
    weight_key=loaded_weight.keys()

    diff_list=list()
    for key in weight_key:
      if key not in model_key:
        diff_list.append(key)

    for diff_key in diff_list:
      del loaded_weight[diff_key]

    test_model.load_state_dict(loaded_weight)

    epoch=int(epoch_path.split('_')[-1].split('.')[0])


    losses=[]
    accs=[]

    # epoch_total_loss: 1 epoch에서 발생한 loss 누적값, 750(1 epoch batch수)을 나눠서 평균 loss값을 구하는데 사용할 예정
    epoch_total_loss=0
    epoch_total_acc=0
    start_time=time.time()


    for i, inp in enumerate(cnd_dataloader):

        input, label= inp
        input, label= input.to(device), torch.Tensor(label).to(device)


        # test_model을 태운 다음 loss를 계산한다.
        output= test_model(input)
        loss= loss_f(output, label)

        # accuracy를 계산한다.
        correct=0
        for t in zip(output.tolist(), label.tolist()):
          if t[0][0] >= 0.5:
              ans=0
          else:
              ans=1
          if ans==t[1]:
            correct+=1
          else:
            pass

        # 계산한 loss를 losses에 추가한다.


        epoch_total_loss+=loss.item()
        epoch_total_acc+=correct


    end_time=time.time()

    avg_loss=(epoch_total_loss/100) / len(cnd_test)
    avg_acc=epoch_total_acc / len(cnd_test)

    print(f'Loss [{epoch+1}](epoch): ', avg_loss)
    print(f'Accuracy [{epoch+1}](epoch): ', avg_acc)
    print('time taken(per epoch):', end_time-start_time)

    losses.append(avg_loss)
    accs.append(avg_acc)

    # writer.add_scalar("Loss / epoch ", epoch_total_loss/len(cnd_test), epoch)
    # writer.add_scalar("Accuracy / epoch", epoch_total_acc/len(cnd_test), epoch)

writer.close()


/content/drive/MyDrive/Colab_Notebooks/resnet/resnet_pth/model_weights_30.pth
cpu 병렬 x
Loss [31](epoch):  0.0001812850484251976
Accuracy [31](epoch):  0.946
time taken(per epoch): 541.6370379924774
/content/drive/MyDrive/Colab_Notebooks/resnet/resnet_pth/model_weights_31.pth
cpu 병렬 x
Loss [32](epoch):  0.00018184634417295456
Accuracy [32](epoch):  0.948
time taken(per epoch): 175.76138830184937
/content/drive/MyDrive/Colab_Notebooks/resnet/resnet_pth/model_weights_32.pth
cpu 병렬 x
Loss [33](epoch):  0.0001829022765159607
Accuracy [33](epoch):  0.942
time taken(per epoch): 174.63551259040833
/content/drive/MyDrive/Colab_Notebooks/resnet/resnet_pth/model_weights_33.pth
cpu 병렬 x
Loss [34](epoch):  0.0001810843101143837
Accuracy [34](epoch):  0.95
time taken(per epoch): 173.35347723960876
/content/drive/MyDrive/Colab_Notebooks/resnet/resnet_pth/model_weights_34.pth
cpu 병렬 x


In [None]:
# 폴더에 가중치가 있으면 불러온다.
# 아래와 같이 코드가 복잡한 이유는 GPU일 때와 CPU일 때의 차이가 있기 때문에 이를 맞춰줘야 했기 때문
# 또한 병렬 처리가 된 GPU의 경우 추가로 key를 생성하는데, 비병렬 상황에서는 이를 제거해야 된다.
weight_list=natsort.natsorted(glob.glob(weight_save_path+'/*.pth'), reverse=True)

if weight_list:
  start_epoch=int(weight_list[0].split('_')[-1].split('.')[0])+1
  print(f'{start_epoch+1} epoch 부터 시작합니다.')

  # GPU 사용 불가시
  if device=='cpu':
    loaded_weight=torch.load(weight_list[0], map_location=torch.device('cpu'))
    if isinstance(train_model,nn.DataParallel):
      print('cpu 병렬')
    else:
      print('cpu 병렬 x')

  # GPU 사용 가능시
  else:
    loaded_weight=torch.load(weight_list[0])
    if isinstance(train_model,nn.DataParallel):
      print('gpu 병렬')
    else:
      print('gpu 병렬 x')

  model_key=train_model.state_dict().keys()
  weight_key=loaded_weight.keys()

  diff_list=list()
  for key in weight_key:
    if key not in model_key:
      diff_list.append(key)

  for diff_key in diff_list:
    del loaded_weight[diff_key]

  train_model.load_state_dict(loaded_weight)

else:
  start_epoch=0
  print('처음부터 시작합니다.')