# Dataset

In [1]:
import torch
import torch.utils.data as data
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import os
import random
import numpy as np
import torch
import pandas as pd
from tqdm.notebook import tqdm

In [2]:
os.getcwd()

'/opt/ml/input/data/train/resnet18_final'

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    

seed_everything(2021)

In [4]:
data_dir = '/opt/ml/input/data/train'
img_dir = f'{data_dir}/images'
df_path = f'{data_dir}/train.csv'


In [27]:
import Dataset
# normalize를 위한 대략적인 RGB 값 평균 
mean, std = (0.5, 0.5, 0.5), (0.2, 0.2, 0.2) 

transform = Dataset.get_transforms(mean=mean, std=std)

dataset = Dataset.MaskBaseDataset(
    img_dir = img_dir
)

# train dataset과 validation dataset을 8:2 비율로 나눕니다.
n_val = int(len(dataset) * 0.2)
n_train = len(dataset) - n_val
train_dataset, val_dataset = data.random_split(dataset, [n_train, n_val])

# 각 dataset에 augmentation 함수를 설정합니다.
train_dataset.dataset.set_transform(transform['train'])
val_dataset.dataset.set_transform(transform['val'])


# Data loader

In [6]:
BATCH_SIZE = 32

train_loader = data.DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    num_workers=2,
    shuffle=True 
)

val_loader = data.DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    num_workers=2,
    shuffle=False
)


# Model

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [9]:
import Pretrained_Model
model = Pretrained_Model.pretrainedModel().to(device)


# optimizer

In [13]:
# optimizer = optim.SGD(model.parameters(), lr=0.01,
#                      momentum=0.9, weight_decay = 0.0005)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size= 10, gamma=0.1) #학습률 점차 감소

# Loss

In [14]:
import Loss
#criterion = Loss.FocalLoss()
criterion = F.cross_entropy

In [18]:
from torch.utils.tensorboard import SummaryWriter
writer  = SummaryWriter('./exp/18_class_last' )

# Training

In [22]:
import Train_Process
EPOCHS= 30
for epoch in tqdm(range(1, EPOCHS+1)):
    train_loss, train_acc, f1 = Train_Process.train(
                                    model = model,
                                    train_loader= train_loader, 
                                    loss = criterion,
                                    optimizer = optimizer, 
                                    writer = writer,
                                    device = device,
 )
    #scheduler.step()
    writer.add_scalar('Loss/train', train_loss, epoch)
    writer.add_scalar('acc/train', train_acc, epoch)
    writer.add_scalar('F1_score/train', f1, epoch)
    print(f'[{epoch}] train loss : {train_loss}, train acc : {train_acc}, train f1 : {f1:.4f}')
    
    valid_loss, valid_acc, val_f1 = Train_Process.evaluate(
                                            model = model, 
                                            val_loader = val_loader, 
                                            loss = criterion,
                                            writer = writer,
                                            device = device,
                                            )
    writer.add_scalar('Loss/valid', valid_loss, epoch)
    writer.add_scalar('acc/valid', valid_acc, epoch)
    writer.add_scalar('F1_score/valid',  val_f1 , epoch)

    print(f'[{epoch}] valid loss : {valid_loss}, valid acc : {valid_acc}, valid f1 : {val_f1:.4f}')

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=30.0), HTML(value='')))

Finished Training
[1] train loss : 2.4154041966539808e-06, train acc : 0.19406709176601053, train f1 : 0.0295
[1] valid loss : 0.5872509936761552, valid acc : 85.94399778209038, valid f1 : 0.0235
Finished Training
[2] train loss : 1.5482597518712282e-05, train acc : 0.17327418907679512, train f1 : 0.0295
[2] valid loss : 0.20538309916527542, valid acc : 94.5661214305517, valid f1 : 0.0286
Finished Training
[3] train loss : 9.936728929460514e-06, train acc : 0.18713612420293874, train f1 : 0.0296
[3] valid loss : 0.17523836410372814, valid acc : 94.87108400332687, valid f1 : 0.0281
Finished Training
[4] train loss : 3.5218213270127308e-06, train acc : 0.19406709176601053, train f1 : 0.0295
[4] valid loss : 0.14825712947889083, valid acc : 95.81369559190463, valid f1 : 0.0287
Finished Training
[5] train loss : 8.838258509058505e-06, train acc : 0.18713612420293874, train f1 : 0.0297
[5] valid loss : 0.43591691293741547, valid acc : 92.32048794011644, valid f1 : 0.0275
Finished Training
[

In [23]:

from albumentations import *
from albumentations.pytorch import ToTensorV2
test_dir = '/opt/ml/input/data/eval'

In [31]:
import Test_Dataset
# meta 데이터와 이미지 경로를 불러옵니다.
submission = pd.read_csv(os.path.join(test_dir, 'info.csv'))
image_dir = os.path.join(test_dir, 'images')

# Test Dataset 클래스 객체를 생성하고 DataLoader를 만듭니다.
image_paths = [os.path.join(image_dir, img_id) for img_id in submission.ImageID]


transform = Compose([
            Resize(512, 384, p=1.0),
            Normalize(mean=mean, std=std, max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.0)



test_dataset = Test_Dataset.TestDataset(image_paths, transform)

loader = data.DataLoader(
    test_dataset,
    shuffle=False
)


all_predictions = [] # 예측 라벨값
all_predictions_values = [] # 마지막 노드값
for images in loader:
    with torch.no_grad():
        images = images.to(device)
        pred = model(images)
        all_predictions_values.extend(pred.cpu().numpy())
        pred = pred.argmax(dim=-1)
        all_predictions.extend(pred.cpu().numpy())
        
submission['ans'] = all_predictions

# 제출할 파일을 저장합니다.
submission.to_csv(os.path.join(test_dir, 'submission_final.csv'), index=False)
print('test inference is done!')

test inference is done!
