In [1]:
import os
import pandas as pd
from PIL import Image

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

from tqdm.notebook import tqdm

from pytz import timezone
import datetime as dt

from f1_score import f1_loss

import wandb

In [4]:
from models.model_bj import resnetbase3 as MaskModel
from datasets.dataset_kj import MaskDatasetA as MaskDataset
from trans.trans_kj import A_simple_trans as TrainTrans
from trans.trans_kj import A_just_tensor as TestTrans

CLASS_NUM = 18
NUM_WORKERS = 4
BATCH_SIZE = 32
NUM_EPOCH = 20
SAVE_INTERVAL = 3
LEARNING_RATE = 1e-3

config={"epochs": NUM_EPOCH, "batch_size": BATCH_SIZE, "learning_rate" : LEARNING_RATE}
wandb.init(project='lv1_p', entity='presto105', config=config)
wandb.run.name = 'resnet18_BS32'

load_path = ''

comment = ''

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

In [5]:
c = ''
log = []

test_dir = '/opt/ml/input/data/train'
eval_dir = '/opt/ml/input/data/eval'
save_dir = '/opt/ml/image-classification-level1-25/save/'
now = (dt.datetime.now().astimezone(timezone("Asia/Seoul")).strftime("%Y%m%d_%H%M%S"))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = MaskModel(CLASS_NUM)
if load_path : model.load_state_dict(torch.load(load_path))    
model = model.to(device)

loss_fn = torch.nn.CrossEntropyLoss()
optm = torch.optim.Adam(model.parameters())

TrainTransform = TrainTrans()
TestTransfrom = TestTrans()

dataset_train_mask = MaskDataset(test_dir, train='train', transform=TrainTransform)
dataset_test_mask = MaskDataset(test_dir, train='test', transform=TestTransfrom)

dataloader_train_mask = DataLoader(dataset=dataset_train_mask,
                                      batch_size=BATCH_SIZE,
                                      num_workers=NUM_WORKERS,
                                      )
dataloader_test_mask = DataLoader(dataset=dataset_test_mask,
                                      batch_size=BATCH_SIZE,
                                      num_workers=NUM_WORKERS,
                                      )

dataloaders = {
        "train": dataloader_train_mask,
        "test": dataloader_test_mask
    }

In [6]:
log.append(f'{c:#^80}')
log.append(f'  [Comment]')
log.append(f'{comment}')
log.append(f'{c:#^80}')
log.append(c); log.append(c); log.append(c)

log.append(f'Model         : {model.__class__.__name__}')
log.append(f'  load_state  : {load_path}')
log.append(f'Dataset       : {dataset_train_mask.__class__.__name__}')
log.append(f'  train_len    {len(dataset_train_mask):>10}')
log.append(f'  test_len     {len(dataset_test_mask):>10}')
log.append(f'Train_trans   : {TrainTrans.__name__}')
log.append(f'Test_trans    : {TestTrans.__name__}')
log.append(f'Start_Date    : {now}')
log.append(f'Device        : {device}')
log.append(f'CLASS_NUM     : {CLASS_NUM}')
log.append(f'NUM_WORKERS   : {NUM_WORKERS}')
log.append(f'BATCH_SIZE    : {BATCH_SIZE}')
log.append(f'NUM_EPOCH     : {NUM_EPOCH}')
log.append(f'SAVE_INTERVAL : {SAVE_INTERVAL}')


for line in log:
    print(line)
    
log.append(c); log.append(c); log.append(c)

################################################################################
  [Comment]

################################################################################



Model         : resnetbase3
  load_state  : 
Dataset       : MaskDatasetA
  train_len         15120
  test_len           3780
Train_trans   : A_simple_trans
Test_trans    : A_just_tensor
Start_Date    : 20210826_161022
Device        : cuda:0
CLASS_NUM     : 18
NUM_WORKERS   : 4
BATCH_SIZE    : 32
NUM_EPOCH     : 20
SAVE_INTERVAL : 3


In [7]:
best_test_accuracy = 0.
best_test_loss = float('inf')
best_f1 = 0.

for epoch in range(NUM_EPOCH):
    for phase in ["train", "test"]:
        running_loss = 0.
        running_acc = 0.
        running_f1 = 0.
        
        if phase == "train":
            model.train()
        elif phase == "test":
            model.eval() 
            
        for idx, (images, labels) in enumerate(pbar := tqdm(dataloaders[phase]), start = 1):
            images, labels = images.to(device), labels.to(device)

            optm.zero_grad()
            
            with torch.set_grad_enabled(phase == "train"):
                logits = model(images)
                _, preds = torch.max(logits, 1)
                loss = loss_fn(logits, labels)
                if phase == "train":
                    loss.backward()  # 모델의 예측 값과 실제 값의 CrossEntropy 차이를 통해 gradient 계산
                    optm.step()  # 계산된 gradient를 가지고 모델 업데이트
            
            running_loss += loss.item() * images.size(0)
            running_acc += torch.sum(preds == labels.data)
            running_f1 += f1_loss(labels.data, preds)
            pbar.set_description(f"loss : {running_loss/(idx*BATCH_SIZE):.3f}, acc : {running_acc/(idx*BATCH_SIZE):.3f}, f1 : {running_f1/(idx*BATCH_SIZE):.3f}")
    
        epoch_loss = running_loss / len(dataloaders[phase].dataset)
        epoch_acc = running_acc / len(dataloaders[phase].dataset)
        epoch_f1 = running_f1 / len(dataloaders[phase].dataset)

        log.append(f"[{phase.upper():<5}] Epoch {epoch:0>3d} // (avg) Loss : {epoch_loss:.3f}, Accuracy : {epoch_acc:.3f}, F1 : {epoch_f1:.3f}")
        wandb.log({'accuracy': epoch_acc, 'loss': epoch_loss, 'F1': epoch_f1})
        print(log[-1])
        
        if phase == "test":
            if best_test_accuracy < epoch_acc:
                best_test_accuracy = epoch_acc
            if best_test_loss > epoch_loss:
                best_test_loss = epoch_loss
            if best_f1 < epoch_f1:
                best_f1 = epoch_f1
            if epoch % SAVE_INTERVAL == 0:
                torch.save(model.state_dict(), os.path.join(save_dir, f'{now}_{model.__class__.__name__}_epoch_{epoch:0>3d}.pt'))

torch.save(model.state_dict(), os.path.join(save_dir, f'{now}_{model.__class__.__name__}_finish_{NUM_EPOCH:0>3d}.pt'))


log.append(c)
print(log[-1])   
log.append(c)
print(log[-1])  
log.append(c)
print(log[-1])  
log.append(f'{c:#^80}')
print(log[-1])            
log.append(f':::학습종료:::')
print(log[-1])
log.append(f"최고 accuracy : {best_test_accuracy}, 최저 loss : {best_test_loss}, 최고 F1 : {best_f1}")
print(log[-1])
log.append(f'{c:#^80}')
print(log[-1]) 

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


[TRAIN] Epoch 000 // (avg) Loss : 1.834, Accuracy : 0.466, F1 : 0.249


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[TEST ] Epoch 000 // (avg) Loss : 5.985, Accuracy : 0.119, F1 : 0.157


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


[TRAIN] Epoch 001 // (avg) Loss : 1.596, Accuracy : 0.473, F1 : 0.266


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[TEST ] Epoch 001 // (avg) Loss : 5.066, Accuracy : 0.093, F1 : 0.195


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


[TRAIN] Epoch 002 // (avg) Loss : 1.567, Accuracy : 0.463, F1 : 0.269


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[TEST ] Epoch 002 // (avg) Loss : 70.182, Accuracy : 0.075, F1 : 0.148


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


[TRAIN] Epoch 003 // (avg) Loss : 1.527, Accuracy : 0.480, F1 : 0.272


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[TEST ] Epoch 003 // (avg) Loss : 3.051, Accuracy : 0.305, F1 : 0.040


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


[TRAIN] Epoch 004 // (avg) Loss : 1.528, Accuracy : 0.470, F1 : 0.268


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[TEST ] Epoch 004 // (avg) Loss : 16.975, Accuracy : 0.256, F1 : 0.088


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


[TRAIN] Epoch 005 // (avg) Loss : 1.514, Accuracy : 0.465, F1 : 0.270


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[TEST ] Epoch 005 // (avg) Loss : 5.472, Accuracy : 0.298, F1 : 0.059


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


[TRAIN] Epoch 006 // (avg) Loss : 1.534, Accuracy : 0.470, F1 : 0.270


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[TEST ] Epoch 006 // (avg) Loss : 6.282, Accuracy : 0.355, F1 : 0.043


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


[TRAIN] Epoch 007 // (avg) Loss : 1.538, Accuracy : 0.444, F1 : 0.270


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[TEST ] Epoch 007 // (avg) Loss : 2.059, Accuracy : 0.359, F1 : 0.021


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


[TRAIN] Epoch 008 // (avg) Loss : 1.538, Accuracy : 0.433, F1 : 0.269


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[TEST ] Epoch 008 // (avg) Loss : 2.992, Accuracy : 0.348, F1 : 0.052


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


[TRAIN] Epoch 009 // (avg) Loss : 1.527, Accuracy : 0.429, F1 : 0.271


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[TEST ] Epoch 009 // (avg) Loss : 5.385, Accuracy : 0.344, F1 : 0.104


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


[TRAIN] Epoch 010 // (avg) Loss : 1.529, Accuracy : 0.441, F1 : 0.271


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[TEST ] Epoch 010 // (avg) Loss : 3.748, Accuracy : 0.081, F1 : 0.129


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))




KeyboardInterrupt: 

In [48]:
class TestDataset(Dataset):
    def __init__(self, img_paths, transform):
        self.img_paths = img_paths
        self.transform = transform

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])

        if self.transform:
            image = self.transform(image)
        return image

    def __len__(self):
        return len(self.img_paths)

# meta 데이터와 이미지 경로를 불러옵니다.
submission = pd.read_csv(os.path.join(eval_dir, 'info.csv'))
image_dir = os.path.join(eval_dir, 'images')

# Test Dataset 클래스 객체를 생성하고 DataLoader를 만듭니다.
image_paths = [os.path.join(image_dir, img_id) for img_id in submission.ImageID]
transform = transforms.Compose([
    transforms.Resize((512, 384), Image.BILINEAR),
    # transforms.CenterCrop(300),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.2, 0.2, 0.2)),
])
dataset = TestDataset(image_paths, transform)

loader = DataLoader(
    dataset,
    shuffle=False
)

# 모델을 정의합니다. (학습한 모델이 있다면 torch.load로 모델을 불러주세요!)
device = torch.device('cuda')
model.eval()

# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
all_predictions = []
for images in tqdm(loader):
    with torch.no_grad():
        images = images.to(device)
        pred = model(images)
        pred = pred.argmax(dim=-1)
        all_predictions.extend(pred.cpu().numpy())
submission['ans'] = all_predictions

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=12600.0), HTML(value='')))




In [49]:
# 제출할 파일을 저장합니다.
submission.to_csv(os.path.join(save_dir, f'{now}_result.csv'), index=False)
log.append(f'test inference is done!')
print(log[-1])
log.append(c)
print(log[-1])
log.append(f'{c:-^80}')
print(log[-1])
log.append(c)
print(log[-1])



# log 저장
with open(os.path.join(save_dir, f'{now}.log'), "w") as f:
    now = (dt.datetime.now().astimezone(timezone("Asia/Seoul")).strftime("%Y%m%d_%H%M%S"))
    log.append(f'Finish_Date    : {now}')
    print(log[-1])
    for line in log: 
        f.write(line+'\n')

test inference is done!

--------------------------------------------------------------------------------

Finish_Date    : 20210826_102851
