# F17만 학습

In [1]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
from openpyxl import Workbook

cudnn.benchmark = True

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if torch.cuda.is_available():
    print("------- GPU Working -------")
    print("[Current GPU]: " + str(torch.cuda.get_device_name(0)))
else:
    print("------- CPU Working -------")
print('[PyTorch version]: ', torch.__version__)

------- GPU Working -------
[Current GPU]: NVIDIA GeForce RTX 3090
[PyTorch version]:  1.11.0


In [3]:
batch_size=32
epochs=100
num_workers=8  #16안좋은듯
learning_rate =0.1 #1e-5(X)

In [4]:
import numpy as np
import random
import os

def seed_everywhere(seed: int = 42):
    random.seed(seed) # random 파이썬 자체 모듈 랜덤 모듈의 시드 고정
    np.random.seed(seed) # numpy를 사용할 경우 고정
    os.environ["PYTHONHASHSEED"] = str(seed) # os
    # pytorch
    torch.manual_seed(seed) #torch를 거치는 모든 난수들의 생성순서를 고정한다
    torch.cuda.manual_seed(seed) #cuda를 사용하는 메소드들의 난수시드는 따로 고정해줘야한다.
    torch.backends.cudnn.deterministic = True #딥러닝에 특화된 cudnn의 난수시드 고정
    torch.backends.cudnn.benchmark = False 

seed = 42
seed_everywhere(seed)

In [5]:
import torch
import torchvision # from torchvision import datasets
from torchvision import transforms  #import torchvision.transforms as transforms

train_transform = torchvision.transforms.Compose([
                                transforms.Resize((224, 224)),
                                transforms.ToTensor(),
                                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                ])

valid_transform = torchvision.transforms.Compose([
                                transforms.Resize((224, 224)),
                                transforms.ToTensor(),
                                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                ])

In [6]:
folder_path = 'D:\\experiment_dataset'
folder_list = os.listdir(folder_path)
subject_list=folder_list

In [7]:
train_loader_set = []
valid_loader_set = []

for one_sub in subject_list:
    # try:
    train_path= 'D:\\experiment_dataset\\'  +  '{}\\train\\train'.format(one_sub)
    valid_path= 'D:\\experiment_dataset\\'  +  '{}\\train\\val'.format(one_sub)
    # print(train_path) # D:\experiment_datasetF1\train\train
    # print(valid_path) # D:\experiment_datasetF1\train\val
    
    train_dataset = torchvision.datasets.ImageFolder(train_path, transform=train_transform)
    valid_dataset = torchvision.datasets.ImageFolder(valid_path, transform=valid_transform)

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=4, shuffle=False, num_workers=num_workers)

    train_loader_set.append(train_loader)
    valid_loader_set.append(valid_loader)
    # except:
    #     pass
    # break

In [8]:
print(train_dataset)

print(train_dataset.__getitem__(0)[0].size(), train_dataset.__len__())
print(valid_dataset.__getitem__(0)[0].size(), valid_dataset.__len__())

Dataset ImageFolder
    Number of datapoints: 455
    Root location: D:\experiment_dataset\M9\train\train
    StandardTransform
Transform: Compose(
               Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=None)
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )
torch.Size([3, 224, 224]) 455
torch.Size([3, 224, 224]) 56


In [9]:
from torchvision import models
import torch.nn as nn

model = models.resnet152(pretrained=True)

for param in model.parameters(): # False로 설정함으로써 마지막 classifier를 제외한 모든 부분을 고정하여 backward()중에 경사도 계산이 되지 않도록 합니다.
    param.requires_grad = False #model_conv객체의 parameter()에 모든 param들을 훈련못하도록 False해준다
    
    
num_ftrs = model.fc.in_features  # num_ftrs 512, 디폴트1000대신에 출력(예측)할 7dim을 변경.
print('num_ftrs', num_ftrs) #512

model.fc = nn.Linear(num_ftrs, 7) # 여기서 각 출력 샘플의 크기는 7로 설정합니다. # 또는, nn.Linear(num_ftrs, len (class_names))로 일반화할 수 있습니다.
print('model_ft.fc', model.fc)  #model_conv.fc Linear(in_features=512, out_features=7, bias=True) 

model = model.to(device)

loss_func = nn.CrossEntropyLoss()
# criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(model.fc.parameters(), lr=learning_rate, momentum=0.9,
                                 weight_decay=0.0001)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

num_ftrs 2048
model_ft.fc Linear(in_features=2048, out_features=7, bias=True)


In [13]:
import time
import datetime
from datetime import datetime
start_time = time.time()


work_date = datetime.today().strftime("%Y%m%d_%H%M%S")
print(work_date)

model_save_dir = './saved_model'
print('model_save_dir:', model_save_dir)
# model_save_dir = './saved_model/' + work_date  # ./saved_model/2021712)
# print(model_save_dir)

20220607_090445
model_save_dir: ./saved_model


In [11]:
import os
def createFolder(directory):
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
    except OSError:
        print('Error: Creating directory. ' + directory)
createFolder(model_save_dir)

In [12]:
from openpyxl import Workbook

def train(epoch, subject, train_loader):

    model.train()

    best_model_state = copy.deepcopy(model.state_dict())
    best_acc = 0
    
    #에폭당 계산
    epoch_loss = 0
    epoch_len = 0
    epoch_correct =0

    # 배치단위 계산
    total_loss = 0
    total_len = 0
    total_correct = 0
   
    batch_itr = 1 # 배치 도는 횟수(=p_itr) 
    p_itr = 1 #p_itr는 특정 배치횟수(batch_itr)마다 프린트할려고 따로 변수 설정한거일뿐 

    for batch_itr, [batch_sample, batch_label] in list(enumerate(train_loader))[:]:

        sample = batch_sample.to(device)
        label = batch_label.to(device)  

        optimizer.zero_grad()

        output = model.forward(sample) 
        loss = loss_func(output, label)     

        _, output_index = torch.max(output, 1)   
        preds = output_index

        correct = preds.eq(label)

        total_correct += correct.sum().item()                
        total_loss += loss.item() * batch_label.size(0) #배치씩 계산한 loss * 배치개수 (계속더함)#프린트할 시점까지 계속더함, 프린트후 갱신(=0)
        total_len += len(label)        
        # print()

        epoch_correct += correct.sum().item()
        epoch_loss += loss.item() * batch_label.size(0) 
        epoch_len += len(label)
       
        loss.backward()    
        optimizer.step()   

        if batch_itr % p_itr == 0: # p_itr=2이면 1에폭(itr)안에서 배치사이즈만큼 2번돌때마다 출력 #프린트시점까지 누적된 배치 정확도
            print('[Epoch {}/{}] Iteration {} -> Train Loss: {:.4f}, Accuracy: {:.4f}'.format \
              (epoch + 1, epochs, batch_itr+1, total_loss/total_len, total_correct/total_len))

            sheet.append(['[Epoch {}/{}] Iteration {} -> Train Loss: {:.4f}, Accuracy: {:.4f}'.format \
              (epoch + 1, epochs, batch_itr+1, total_loss/total_len, total_correct/total_len)])
            
            
            total_loss = 0  #배치씩 계산한 loss더한값. #트레인로스계산: 배치씩 계싼한 로스값 더하기/배치 횟수
            total_len = 0
            total_correct = 0
        batch_itr += 1
        
    #1에폭당 train loss, acc
    epoch_train_loss = epoch_loss / epoch_len #len(train_loader.dataset) # 모든 배치마다 total_loss계속더하고, total_len계속 더해서 나누기
    epoch_train_accuracy = epoch_correct / epoch_len  #모든 배치마다 total_correct더하고, tatal_len더해서 나누기  
    # 모델을 깊은 복사(deep copy)함
    if epoch_train_accuracy > best_acc:
        # print('Val label: ', labels.data)
        best_acc = epoch_train_accuracy
        best_model_state = copy.deepcopy(model.state_dict())    
    

        model_epoch = '{}.pt'.format(subject)
        # model_epoch = 'epoch_{}.pt'.format(epoch + 1)
        final_save_dir = os.path.join(model_save_dir, model_epoch)
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict()
        }, final_save_dir)
        print("=> {}/{} saving checkpoint".format(epoch + 1, epochs))    
        # write_ws.append(["=> {}/{} saving checkpoint".format(epoch + 1, epochs)])
       
        print('{}/{}  Train Loss: {:.4f}, Train Accuracy: {:.4f}'.format(epoch + 1, epochs,  epoch_train_loss, epoch_train_accuracy)) #(에폭당) 전체데이터셋에 대한 정확도
        sheet.append(['{}/{}  Train Loss: {:.4f}, Train Accuracy: {:.4f}'.format(epoch + 1, epochs,  epoch_train_loss, epoch_train_accuracy)])
        # workbook.save('./Training_Results.xlsx')

In [15]:
def test(subject, test_loader):
    model.eval()

    total_loss = 0
    total_len = 0
    total_correct = 0

    for batch_sample, batch_label in test_loader:

        sample = batch_sample.to(device)
        label = batch_label.to(device)

        with torch.no_grad():
            output = model.forward(sample) 
        # loss = loss_func(output, label)     

        _, output_index = torch.max(output, 1)   
        preds = output_index
        # print('preds:', preds)
        # if list(preds) != list(labels):
        #     for p in range(len(preds)):
        #         print('Not predictd: pred {} label {}'.format(preds[p], labels[p]))        
        correct = preds.eq(label)   #1배치에 맞힌 갯수     
        total_correct += correct.sum().item()     
        total_len += len(label)  

        test_accuracy = total_correct / total_len #이시점까지(에폴당) 누적된(배치마다 계속 맞힌 갯수 더한) 정확도

    print('{} Test accuracy: {:.4f}'.format(subject, test_accuracy)) 
    # print('{}/{}  Test accuracy: {:.4f}'.format(epoch + 1, epochs, test_accuracy)) #(에폭당) 전체데이터셋에 대한 정확도 #마지막 배치(앞에서부터 끝까지 누적됨)정확도랑 같음  
    
    
    sheet.append(['{}'.format(subject),
                     'Test accuracy:',
                     '{:.4f}'.format(test_accuracy)])# row 단위로 데이터 입력하기

    # workbook.save('./Training_Results.xlsx')

In [16]:
for one_subject, one_train_loader, one_valid_loader in list(zip(subject_list, train_loader_set, valid_loader_set))[8:9]:
    print(one_subject, one_train_loader, one_valid_loader)

    workbook = Workbook( )#workbook 생성하기(1개의 시트가 생성된 상태)
    sheet = workbook.active #현재 workbook의 활성화 된 Sheet 가져오기
    sheet.title = "정확도" #해당 sheet의 sheet명 변경하기
    
    start_epoch = 0
    start_time = time.time()
    total_start_time = time.time()


    for epoch in range(start_epoch, epochs):
        start_time = time.time()

        train(epoch, subject=one_subject, train_loader=one_train_loader)

        test(subject=one_subject, test_loader=one_valid_loader)
        workbook.save('./Training_Results_{}.xlsx'.format(one_subject))
        # total_test_loss, total_test_accuracy = test()  
        # print('Test Loss: {:.4f}, Test accuracy: {:.4f}'.format(total_test_loss, total_test_accuracy)) 

        now = time.gmtime(time.time() - start_time)
        print('{} hours {} mins {} secs for training'.format(now.tm_hour, now.tm_min, now.tm_sec))
        print()
        # break


    now = time.gmtime(time.time() - total_start_time)
    print(' Training Time: {} hours {} mins {} secs for training'.format(now.tm_hour, now.tm_min, now.tm_sec))
    print('===================================================================================================================')
#     break

F17 <torch.utils.data.dataloader.DataLoader object at 0x000001B5865EB970> <torch.utils.data.dataloader.DataLoader object at 0x000001B586615820>
[Epoch 1/100] Iteration 1 -> Train Loss: 65.1286, Accuracy: 0.0938
[Epoch 1/100] Iteration 2 -> Train Loss: 40.5272, Accuracy: 0.2500
[Epoch 1/100] Iteration 3 -> Train Loss: 41.0845, Accuracy: 0.1875
[Epoch 1/100] Iteration 4 -> Train Loss: 36.7792, Accuracy: 0.0625
[Epoch 1/100] Iteration 5 -> Train Loss: 53.6249, Accuracy: 0.2188
[Epoch 1/100] Iteration 6 -> Train Loss: 60.3558, Accuracy: 0.2188
[Epoch 1/100] Iteration 7 -> Train Loss: 41.0046, Accuracy: 0.3125
[Epoch 1/100] Iteration 8 -> Train Loss: 52.7502, Accuracy: 0.2188
[Epoch 1/100] Iteration 9 -> Train Loss: 31.0448, Accuracy: 0.2812
[Epoch 1/100] Iteration 10 -> Train Loss: 25.5890, Accuracy: 0.3438
[Epoch 1/100] Iteration 11 -> Train Loss: 25.5904, Accuracy: 0.3125
[Epoch 1/100] Iteration 12 -> Train Loss: 38.2391, Accuracy: 0.3125
[Epoch 1/100] Iteration 13 -> Train Loss: 27.6523