In [1]:
import csv
import glob
import pandas as pd
import numpy as np
import PIL
import cv2

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data.dataloader import DataLoader

import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import copy
import os
import wandb
import time
import parse_config

In [2]:
# fix random seeds for reproducibility
SEED = 123
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(SEED)

# 1. 데이터 로드

In [3]:
train_dir = '/opt/ml/input/data/train'
test_dir = '/opt/ml/input/data/eval'
save_dir = './saved/models/'

### 하이퍼파라미터

In [4]:
#model_name = 'efficientnet_b1'
model_name = 'vit_large_r50_s32_384'
learning_rate = 5e-5
batch_size = 12
T_max = 50
epochs = 20
earlystop = 5

MEAN_IMAGENET = [0.485, 0.456, 0.406]
STD_IMAGENET = [0.229, 0.224, 0.225]

A_transform = {
    'train':
        A.Compose([
            A.Resize(512, 512),
            A.RandomCrop(384, 384),
            A.HorizontalFlip(p=0.5),
            A.Cutout(num_holes=8, max_h_size=32,max_w_size=32),
            A.ElasticTransform(),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2()
        ]),
    'valid':
        A.Compose([
            A.Resize(384, 384),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2()
        ]),
    'VIT_test':
        A.Compose([
            A.Resize(384, 384),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2()
        ]),
    'ENN_test':
        A.Compose([
            A.Resize(224, 224),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2()
        ])
}



In [5]:
class LoadCSV():
    def __init__(self, dir):
        self.dir = dir
        self.img_dir =train_dir + '/new_images/'
        self.origin_csv_path = train_dir + '/train.csv'
        self.trans_csv_path = train_dir + '/trans_train.csv'
        self.incorrect_labels = {'error_in_female' : ['006359', '006360', '006361', '006362', '006363', '006364'],
                                'error_in_male' : ['001498-1', '004432'],
                                'swap_normal_incorrect' : ['000020', '004418', '005227']}

        if not os.path.exists(self.trans_csv_path):
            self._makeCSV()
        self.df = pd.read_csv(self.trans_csv_path)
        #self.df = self.df[:200]
    def _makeCSV(self):        
        with open(self.trans_csv_path, 'w', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(["path", "label"])

            df = pd.read_csv(self.origin_csv_path)
            for idx in range(len(df)):
                data = df.iloc[idx]
                img_path_base = os.path.join(os.path.join(self.img_dir, data['path']), '*')
                for img_path in glob.glob(img_path_base):
                    label = 0
                    if "incorrect" in img_path:
                        label+=6
                    elif 'normal' in img_path:
                        label+=12
                    elif data['gender']=='female':
                        label+=3
                    elif data['age'] >= 30 and data['age'] < 60:
                        label+=1
                    elif data['age'] >= 60:
                        label+=2
                    # incorrect label fix
                    ## 1. female -> male
                    if data['id'] in self.incorrect_labels['error_in_female']:
                        label-=3
                    ## 2. male -> female
                    if data['id'] in self.incorrect_labels['error_in_male']:
                        label+=3
                    ## 3. mask <-> incorrect
                    if "incorrect" in img_path and data['id'] in self.incorrect_labels['swap_normal_incorrect']:
                        label+=6
                    if "normal" in img_path and data['id'] in self.incorrect_labels['swap_normal_incorrect']:
                        label-=6
                    writer.writerow([img_path, label])
        f.close()

class MaskDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        super().__init__()
        self.df = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        class_id = torch.tensor(self.df['label'].iloc[idx])
        img = PIL.Image.open(self.df['path'].iloc[idx])
        img = np.array(img.convert("RGB"))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, class_id

# 2. 모델 설계


In [6]:
class VIT_Model(nn.Module):
    def __init__(self, model_name, num_classes):
        super(VIT_Model, self).__init__()
        self.num_classes = num_classes
        self.model = timm.create_model(model_name, pretrained=True)

        n_features = self.model.head.in_features
        self.model.head = torch.nn.Linear(in_features=n_features, out_features=self.num_classes, bias=True)
        torch.nn.init.xavier_uniform_(self.model.head.weight)
        stdv = 1/np.sqrt(self.num_classes)
        self.model.head.bias.data.uniform_(-stdv, stdv)
        
    def forward(self, x):
        return self.model(x)

class ENN_Model(nn.Module):
    def __init__(self, model_name, num_classes):
        super(ENN_Model, self).__init__()
        self.num_classes = num_classes
        self.model = timm.create_model(model_name, pretrained=True)

        n_features = self.model.classifier.in_features
        self.model.classifier = torch.nn.Linear(in_features=n_features, out_features=num_classes, bias=True)
        torch.nn.init.xavier_uniform_(self.model.classifier.weight)
        stdv = 1/np.sqrt(self.num_classes)
        self.model.classifier.bias.data.uniform_(-stdv, stdv)

    def forward(self, x):
        return self.model(x)

# 3. 학습

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

today = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time()))
if not os.path.exists(save_dir + today):
    os.makedirs(save_dir + today + '_' + model_name)

import wandb
wandb.login()

import torchmetrics
from torchmetrics.functional import f1
calc_train_acc = torchmetrics.Accuracy()
calc_train_f1 = torchmetrics.F1(num_classes=18)
calc_valid_acc = torchmetrics.Accuracy()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mnudago[0m (use `wandb login --relogin` to force relogin)


In [8]:
from sklearn.model_selection import StratifiedKFold
mask_csv = LoadCSV(train_dir)
kfold = StratifiedKFold(n_splits=5, shuffle=False)

for fold, (train_idx, valid_idx) in enumerate(kfold.split(mask_csv.df['path'], mask_csv.df['label'])):
    print(f'FOLD {fold}')

    mask_train = MaskDataset(mask_csv.df,  transform=A_transform['train'])
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_idx)
    valid_subsampler = torch.utils.data.SubsetRandomSampler(valid_idx)

    train_loader = DataLoader(mask_train, batch_size=batch_size, sampler=train_subsampler, drop_last=False, num_workers=8, pin_memory=True)
    valid_loader = DataLoader(mask_train, batch_size=batch_size, sampler=valid_subsampler, drop_last=False, num_workers=8, pin_memory=True)
    dataloaders = {'train': train_loader, 'valid':valid_loader}

    model = VIT_Model(model_name, 18).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), learning_rate)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50, eta_min=0)
    
    earlystop_value = 0
    best_acc = 0
    best_loss = 999999999
    best_model = copy.deepcopy(model.state_dict())

    wandb.init(project="mask_kfold", entity='boostcamp-level01-04', config={"learning_rate":learning_rate, "batch_size":batch_size, "epochs":epochs})
    now = time.strftime('%Y%m%d_%H%M%S', time.localtime(time.time()))
    wandb.watch(model, criterion, log='all')
    wandb.run.name = f'{model_name}_kfold{fold}_{now}_yh'
    example_ct = 0
    
    for epoch in range(epochs):
        if earlystop_value >= earlystop:
            break
        train_loss, valid_loss, train_acc_list, valid_acc_list = 0, 0, [],[]

        model.train()
        running_loss = 0.0
        with tqdm(dataloaders['train'], total=dataloaders['train'].__len__(), unit="batch") as train_bar:
            for batch_idx, (inputs, labels) in enumerate(train_bar):
                example_ct = epoch * len(dataloaders['train']) + batch_idx
                train_bar.set_description(f"train Epoch {epoch} ")
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                outputs = outputs.cpu().detach()#.numpy()
                labels = labels.cpu().detach()#.numpy()

                running_loss += loss.item() * inputs.size(0)
                epoch_loss = running_loss / len(dataloaders['train'].dataset)
                train_acc = calc_train_acc(outputs.argmax(1), labels)
                train_bar.set_postfix(loss=epoch_loss, acc=train_acc)

        wandb.log({'train_loss':loss.item(),  'train_acc':train_acc.compute()}, step=example_ct)
        lr_scheduler.step()


        model.eval()
        running_loss = 0.0
        with tqdm(dataloaders['valid'], total=dataloaders['valid'].__len__(), unit="batch") as valid_bar:
            for batch_idx, (inputs, labels) in enumerate(train_bar):
                train_bar.set_description(f"valid Epoch {epoch} ")
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                outputs = outputs.cpu().detach()#.numpy()
                labels = labels.cpu().detach()#.numpy()

                running_loss += loss.item() * inputs.size(0)
                epoch_loss = running_loss / len(dataloaders['valid'].dataset)
                valid_acc = calc_valid_acc(outputs.argmax(1), labels)
                train_bar.set_postfix(loss=epoch_loss, acc=valid_acc)

        wandb.log({'valid_loss':epoch_loss,  'valid_acc':valid_acc.compute()}, step=example_ct)
        if epoch_loss < best_loss:
            best_loss = epoch_loss
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(best_model_wts, f'{save_dir}{today}_{model_name}/baseline_{model_name}_lr{learning_rate}_batch{batch_size}_kfold{fold}_epoch{epoch}_valid_loss_{epoch_loss:.5f}.pt')
            earlystop_value = 0
        else:
            earlystop_value += 1

FOLD 0


# 4. 추론

In [None]:
class TestDataset(Dataset):
    def __init__(self, img_paths, transform):
        self.img_paths = img_paths
        self.transform = transform

    def __getitem__(self, index):
        image = PIL.Image.open(self.img_paths[index])
        image = np.array(image.convert("RGB"))
        if self.transform:
            image = self.transform(image=image)
            image = image['image']
        return image

    def __len__(self):
        return len(self.img_paths)

In [None]:
model.load_state_dict(torch.load('/opt/ml/image-classification-level1-04/saved/models/PretrainModelTimm_ViT_base/0901_064031/checkpoint-epoch14.pth')['state_dict'])

<All keys matched successfully>

In [None]:
submission = pd.read_csv(os.path.join(test_dir, 'info.csv'))
image_dir = os.path.join(test_dir, 'new_images')

image_paths = [os.path.join(image_dir, img_id) for img_id in submission.ImageID]
dataset = TestDataset(image_paths, A_transform['VIT_test'])
test_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=8)

model.eval()
all_predictions = []
with tqdm(test_loader, total=test_loader.__len__(), unit="batch") as test_bar:
    for images in test_bar:
        with torch.no_grad():
            images = images.to(device)
            pred = model(images)
            pred = pred.argmax(dim=-1)
            all_predictions.extend(pred.cpu().numpy())
    
submission['ans'] = all_predictions
submission.to_csv(os.path.join(test_dir, 'submission_vit_0901_064031_14.csv'), index=False)
print('test inference is done!')

100%|██████████| 788/788 [02:39<00:00,  4.95batch/s]


test inference is done!


# 5. 제출

## 5.1 K-Fold 제출

In [None]:
import numpy as np
import pandas as pd
import torch.nn.functional as F
from glob import glob 

def kfold_model(folder_path):
    # last checkpoint
    k_fold_paths = {}
    for path in sorted(glob.glob(os.path.join(folder_path, '*'))):
        fold_number = path.split('kfold')[-1][0]
        k_fold_paths[fold_number] = path
    return k_fold_paths

def make_model(model_name='efficientnet_b3', num_classes=18):
    return ENN_Model(model_name, num_classes).to(device)


submission = pd.read_csv(os.path.join(test_dir, 'info.csv'))
image_dir = os.path.join(test_dir, 'new_images')
image_paths = [os.path.join(image_dir, img_id) for img_id in submission.ImageID]
ratio = 1/5

# Submit dataset
dataset = TestDataset(image_paths, A_transform['ENN_test']) # 
test_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
n_last_samples = len(test_loader.dataset) - (len(test_loader) - 1) * batch_size

k_fold_paths = kfold_model('/opt/ml/image-classification-level1-04/saved/models/20210901_154638')

predictions_list = []
for n_fold, path in k_fold_paths.items():
    prediction_array=np.zeros((12600,18))
    idx=0
    model_n = make_model()
    model_n.load_state_dict(torch.load(path))

    prediction_array=np.zeros((12600,18))
    idx=0
    with tqdm(test_loader, total=test_loader.__len__(), unit="batch") as test_bar:
        for images in test_bar:            
            with torch.no_grad():
                images = images.to(device)
                pred = model(images)
                pred=F.softmax(pred,dim=-1)
                #print(pred.sum(axis=1))
                pred=pred*ratio
                #pred = pred.argmax(dim=-1)
                #print(pred.sum(axis=1))
                pred = pred.tolist()
                batch_idx = batch_size * idx
               
                if (idx+1) == len(test_loader):
                    prediction_array[batch_idx:batch_idx + n_last_samples,:] = pred
                else :
                    prediction_array[batch_idx:batch_idx + batch_size, :] = pred
                idx+=1
    predictions_list.append(prediction_array[..., np.newaxis])
        
#print(predictions_list[0].sum(axis=-1))
predictions_array = np.concatenate(predictions_list, axis = 2)
print(predictions_array.shape)
predictions_mean = predictions_array.sum(axis = 2)
print(predictions_mean.shape)

submission['ans'] = predictions_mean
submission.to_csv(os.path.join(test_dir, 'submission_kfold.csv'), index=False)
print('test inference is done!')

## 5.2 Soft Voting

In [None]:
import numpy as np
import pandas as pd
import torch.nn.functional as F

model_num1 = VIT_Model('vit_base_patch16_384', 18).to(device)
model_num1.load_state_dict(torch.load('/opt/ml/image-classification-level1-04/saved/models/PretrainModelTimm_ViT_base/0901_064031/checkpoint-epoch8.pth')['state_dict'])
model_num2 = VIT_Model("vit_base_patch16_384", 18).to(device)
model_num2.load_state_dict(torch.load('/opt/ml/image-classification-level1-04/saved/models/PretrainModelTimm_ViT_base/0901_064031/checkpoint-epoch13.pth')['state_dict'])
model_num3 = ENN_Model("vit_base_patch16_384", 18).to(device)
model_num3.load_state_dict(torch.load('/opt/ml/image-classification-level1-04/saved/models/PretrainModelTimm_ViT_base/0901_064031/checkpoint-epoch14.pth')['state_dict'])

submission = pd.read_csv(os.path.join(test_dir, 'info.csv'))
image_dir = os.path.join(test_dir, 'new_images')
image_paths = [os.path.join(image_dir, img_id) for img_id in submission.ImageID]

best_models=[model_num1,model_num2,model_num3] 
test_idx=['VIT_test','VIT_test','VIT_test']
ratio=[0.3,0.4,0.3]

predictions_list = []
for i,model in enumerate(best_models):
    dataset = TestDataset(image_paths, A_transform[test_idx[i]])
    test_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=8)
    prediction_array=np.zeros((12600,18))
    idx=0
    with tqdm(test_loader, total=test_loader.__len__(), unit="batch") as test_bar:
        for images in test_bar:            
            with torch.no_grad():
                images = images.to(device)
                pred = model(images)
                pred=F.softmax(pred,dim=-1)
                #print(pred.sum(axis=1))
                pred=pred*ratio[i]
                #pred = pred.argmax(dim=-1)
                #print(pred.sum(axis=1))
                pred = pred.tolist()
                batch_idx = batch_size * idx
               
                if (idx+1) == len(test_loader):
                    prediction_array[batch_idx:batch_idx + 8,:] = pred
                else :
                    prediction_array[batch_idx:batch_idx + 16, :] = pred
                idx+=1
    predictions_list.append(prediction_array[..., np.newaxis])
        
#print(predictions_list[0].sum(axis=-1))
predictions_array = np.concatenate(predictions_list, axis = 2)
print(predictions_array.shape)
predictions_mean = predictions_array.sum(axis = 2)
predictions = np.argmax(predictions_mean, axis = 1)
print(predictions_mean.shape)

submission['ans'] = predictions
submission.to_csv(os.path.join(test_dir, 'submission_ensemble.csv'), index=False)
print('test inference is done!')

In [None]:
#df_submit1 = pd.DataFrame(predictions_mean,columns=['ans'])
df_submit3 = pd.read_csv('/opt/ml/input/data/eval/submission_vit_0901_064031_3.csv')
df_submit6 = pd.read_csv('/opt/ml/input/data/eval/submission_vit_0901_064031_6.csv')
df_submit18 = pd.read_csv('/opt/ml/input/data/eval/submission_vit_0901_064031_8.csv')
df_submit11 = pd.read_csv('/opt/ml/input/data/eval/submission_vit_0901_064031_11.csv')
df_submit12 = pd.read_csv('/opt/ml/input/data/eval/submission_vit_0901_064031_12.csv')
df_submit13 = pd.read_csv('/opt/ml/input/data/eval/submission_vit_0901_064031_13.csv')
df_submit14 = pd.read_csv('/opt/ml/input/data/eval/submission_vit_0901_064031_4.csv')
df_submit = pd.read_csv('/opt/ml/input/data/eval/submission_ml_image-classification-level1-04_saved_models_PretrainModelTimm_ViT_large_0826_155755_checkpoint-epoch9.pth.csv')
print((df_submit12.ans == df_submit14.ans).mean())

0.9275396825396826


## 5.3 Hard Voting

In [None]:
df_submit6 = pd.read_csv('/opt/ml/input/data/eval/submission_vit_0901_064031_6.csv')
df_submit8 = pd.read_csv('/opt/ml/input/data/eval/submission_vit_0901_064031_12.csv')
df_submit13 = pd.read_csv('/opt/ml/input/data/eval/submission_vit_0901_064031_13.csv')
df = pd.DataFrame([df_submit8['ans'], df_submit13['ans'], df_submit14['ans']])
df