In [None]:
import os
import pandas as pd
import numpy as np
import random
import glob
import shutil
from PIL import Image
from tqdm import tqdm

# 모델 관련 모듈
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
import torch.optim as optim
import timm

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.transforms import Resize, ToTensor, Normalize
import torchvision.models as models

from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

In [2]:
# 경로 설정

data_dir = '../input/data/train/'
test_dir = '../input/data/eval/'
submission_dir = './submission/'
submission_image_dir = '../input/data/eval/images'
model_dir = './model/'
train_image_dir = data_dir + 'images/'


## Seed 고정

In [3]:
'''
정인식님 코드 참고

'''

random_seed = 42

#pytorch의 random seed 고정

torch.manual_seed(random_seed)

# CuDNN 부분고정

torch.backends.cudnn.deterministic = True # 고정하면 학습이 느려진다고 합니다.

torch.backends.cudnn.benchmark = False

# Numpy 부분

np.random.seed(random_seed)

# transforms에서 random 라이브러리를 사용하기 때문에 random 라이브러리를 불러서 고정

random.seed(random_seed)

# GPU 에서 사용하는 난수 생성 시드 고정

torch.cuda.manual_seed(random_seed)

## 함수 정의

In [4]:
# 이상치 이미지 시각화
def show_img(img_id_li, df, data_dir):
    for img_id in img_id_li:
        get_df = df[df['id'] == img_id]
        
        img_age = get_df['age'].tolist()[0]
        img_gender = get_df['gender'].tolist()[0]
        
        img_path = get_df['path'].tolist()[0]
        img_path = os.path.join(data_dir, img_path)
        img_name_li = sorted(list(os.listdir(img_path)))
        
        fig, ax = plt.subplots(1, 7, figsize = (30, 15))
        ax = ax.flatten()
        
        idx = 0
        for _img_name in img_name_li:
            if _img_name[0] == '.': continue
            
            if _img_name.split('.')[0] == 'normal': imag_name = 'normal'
            elif _img_name.split('.')[0] == 'incorrect_mask': imag_name = 'incorrect_mask'
            else: imag_name = 'mask'
            
            get_img_path = os.path.join(img_path, _img_name)
            
            img = Image.open(get_img_path)
            img = np.array(img)
            ax[idx].imshow(img)
            ax[idx].set_title(f'{img_id} / {img_age} / {img_gender} / {imag_name}')
            ax[idx].set_xticks([])
            ax[idx].set_yticks([])
            idx += 1
            
        plt.show()

# image path로 이미지 시각화
def path_li_show_img(path_li):
    fig, ax = plt.subplots(1, 7, figsize = (30, 15))
    ax = ax.flatten()
    idx = 0
    for path in path_li:
        image_name = path.split('/')[-1]
        img = Image.open(path)
        img = np.array(img)
        ax[idx].imshow(img)
        ax[idx].set_title(f'{image_name}')
        ax[idx].set_xticks([])
        ax[idx].set_yticks([])
        idx += 1
    plt.show()

# ages 생성
def get_ages(x):
    if x < 30: return 0
    elif x < 60: return 1
    else: return 2

# genders 생성
def get_genders(x):
    if x == 'male': return 0
    else: return 1

# masks 생성
def get_masks(x):
    if x == 'normal': return 2
    elif x == 'incorrect_mask': return 1
    else: return 0

# labels 생성
def get_labels(masks, genders, ages):
    return masks * 6 + genders * 3 + ages

# train_df 생성
def get_train_df(df):
    train_df = []
    train_data_dir = '../input/data/train/'
    for line in df.iloc:
        for file in list(os.listdir(os.path.join(train_image_dir, line['path']))):
            if file[0] == '.':
                continue
                
            mask = file.split('.')[0]
            gender = line['gender']
            age = line['age']
            
            masks = get_masks(mask)
            genders = get_genders(gender)
            ages = get_ages(age)
            
            data = {
                'id' : line['id'],
                'mask' : mask,
                'gender' : gender,
                'age' : age,
                'masks' : masks,
                'genders' : genders,
                'ages' : ages,
                'cv_target_col' : line['cv_target_col'],
                'labels': get_labels(masks = masks, genders = genders, ages = ages),
                'path': os.path.join(train_image_dir, line['path'], file),
            }
            train_df.append(data)
            
    train_df = pd.DataFrame(train_df)
    train_df['idx'] = train_df.index
    
    return train_df

# 성별 이상치 처리
def swap_gender(swap_li, df):
    swap_df = df.copy()
    for swap in swap_li:
        swap_id, swap_gender = swap
        swap_df.loc[swap_df[swap_df['id'] == swap_id].index, 'gender'] = swap_gender
    return swap_df
        
# 전처리된 df 생성 - 성별 결측치 처리, cv_target_col 생성
def preprocessing_df(df, swap_gender_li):
    '''
    swap_gender_li = [['006359', 'male'], ['006360', 'male'], ['006361', 'male'], ['006362', 'male'], ['006363', 'male'], ['006364', 'male']]
    '''
    
    preprocessing_df = df.copy()
    preprocessing_df = swap_gender(swap_li = swap_gender_li, df = preprocessing_df)
    
    preprocessing_df['ages'] = preprocessing_df['age'].apply(lambda x : get_ages(x))
    preprocessing_df['genders'] = preprocessing_df['gender'].apply(lambda x : get_genders(x))
    
    preprocessing_df['cv_target_col'] = 'ages' + '_' + preprocessing_df['ages'].astype(str) + '_' + 'genders' + '_' + preprocessing_df['genders'].astype(str)
    
    return preprocessing_df

# val_idx 생성
def get_val_idx(df, target_col):
    skf = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 22)
    for trn_idx, val_idx in skf.split(df, df[target_col]):
        yield val_idx

# 마스크 이상치 변경
def swap_mask(swap_li, df):
    swap_df = df.copy()
    for swap_id in swap_li:
        _swap_df = swap_df[swap_df['id'] == swap_id]
        
        normal_swap_df = _swap_df[_swap_df['mask'] == 'normal']
        incorrect_mask_swap_df = _swap_df[_swap_df['mask'] == 'incorrect_mask']
        
        normal_path = normal_swap_df['path'].values[0]
        incorrect_mask_path = incorrect_mask_swap_df['path'].values[0]
        
        swap_df.loc[normal_swap_df.index, 'path'] = incorrect_mask_path
        swap_df.loc[incorrect_mask_swap_df.index, 'path'] = normal_path
    
    return swap_df

## 데이터 전처리

In [5]:
train_df = pd.read_csv(data_dir + 'train.csv')
submission = pd.read_csv(test_dir + 'info.csv')

In [6]:
'''
신규범님 코드 참고

학습 데이터 구축
'''
swap_gender_li = [['006359', 'male'], ['006360', 'male'], ['006361', 'male'], ['006362', 'male'], ['006363', 'male'], ['006364', 'male']]
swap_mask_li = ['000020', '004418', '005227']

pre_df = preprocessing_df(df = train_df, swap_gender_li = swap_gender_li)
train_df = get_train_df(df = pre_df)

train_df = swap_mask(swap_li = swap_mask_li, df = train_df)

train_idx, val_idx = train_test_split(train_df['cv_target_col'], train_size = 0.8, random_state = 42, stratify = train_df['cv_target_col'])
                                      
train_set, val_set = train_df.iloc[train_idx.index, :], train_df.iloc[val_idx.index, :]

## 데이터셋 구축

In [7]:
'''
Sample_submission 코드 참고

데이터 셋 구축
'''

class CustomDataset(Dataset):
    def __init__(self, df, transform, train = True):
        self.train = train
        self.df = df
        if self.train:
            self.img_paths = self.df['path'].tolist()
            self.targets = self.df['labels'].tolist()
        else:
            self.img_paths = [os.path.join(submission_image_dir, img_id) for img_id in self.df.ImageID]
        self.transform = transform

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])
        
        if self.transform:
            image = self.transform(image)
        
        # 이 부분에 해당 라벨에 따른 데이터 변환 여부 추가
        # val 데이터의 경우 데이터 변환이 일어나면 안되기 때문에
        # if self.데이터 변환해주는 transform:
        #     if self.targets[index].data == labels: <- 확률적으로
        #          image = self.데이터 변환해주는 transform(image)
        # 데이터 변환
        
        if self.train:
            targets = torch.tensor(self.targets[index])
            return image, targets
        
        else: return image

    def __len__(self):
        return len(self.img_paths)

## 모델

In [8]:
model_path = 'swin_large_patch4_window7_224'
out_features = 18
inp_channels = 3
pretrained = True

In [9]:
class Swin_T(nn.Module):
    def __init__(self, model_name = model_path, out_features = out_features, inp_channels = inp_channels, pretrained = pretrained):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained, in_chans=inp_channels, num_classes = out_features)
        
        n_inputs = self.model.head.in_features
        self.model.head = nn.Sequential(
                            nn.Linear(n_inputs, 512),
                            nn.ReLU(),
                            nn.Dropout(0.7),
                            nn.Linear(512, out_features)
                            )
        
        
    def forward(self, image):
        output = self.model(image)
        return output

## 학습 설정

In [10]:
'''
학습 함수 설정
'''

def train(model, data_loader, optimizer, scheduler, criterion):
    model.train()
    train_loss = 0
    correct = 0
    total = 0
    f1 = 0
    
    for batch_idx, (images, targets) in enumerate(data_loader):
        images, targets = images.to(device), targets.to(device)
        optimizer.zero_grad()

        benign_outputs = model(images)
        loss = criterion(benign_outputs, targets)
        loss.backward()

        optimizer.step()
        train_loss += loss.item()
        _, predicted = benign_outputs.max(1)

        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
        f1 += f1_score(targets.cpu().tolist(), predicted.cpu().tolist(), average='macro')
        
    train_loss /= len(data_loader)
    acc = correct / total
    f1 /= len(data_loader)
    
    scheduler.step(train_loss)
    
    return train_loss, acc, f1


def val(model, data_loader, criterion):
    model.eval()
    val_loss = 0
    correct = 0
    total = 0
    f1 = 0
    
    for batch_idx, (images, targets) in enumerate(data_loader):
        with torch.no_grad():
            images, targets = images.to(device), targets.to(device)
            benign_outputs = model(images)
            loss = criterion(benign_outputs, targets)
            val_loss += loss.item()
            _, predicted = benign_outputs.max(1)

            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            f1 += f1_score(targets.cpu().tolist(), predicted.cpu().tolist(), average='macro')
    
    val_loss /= len(data_loader)
    acc = correct / total
    f1 /= len(data_loader)
    
    return val_loss, acc, f1

def pred(model, data_loader):
    model.eval()
    all_predictions = []
    for images in data_loader:
        with torch.no_grad():
            images = images.to(device)
            pred = model(images)
            pred = pred.argmax(dim=-1)
            all_predictions.extend(pred.cpu().numpy())
            
    return all_predictions

In [11]:
'''
학습 설정
'''

device = 'cuda' if torch.cuda.is_available() else 'cpu'
lr = 0.001
epochs = 30
batch_size = 64
num_workers = 3


## 데이터 로더 생성

In [12]:
'''
데이터 로더 생성
'''

image_size = (224, 224)
image_normal_mean = (0.485, 0.456, 0.406)
image_normal_std = (0.229, 0.224, 0.225)
crop_size = 384

def crop800(image):
    left = 0
    top = 32
    right = 384
    bottom = 416
    image = image.crop((left, top, right, bottom))
    return image

transform = transforms.Compose([

    transforms.Lambda(crop800),
    ToTensor(),
    Normalize(mean=image_normal_mean, std=image_normal_std),
    Resize(image_size, Image.BILINEAR),
])

train_customset = CustomDataset(df = train_set, transform = transform, train = True)
val_customset = CustomDataset(df = val_set, transform = transform, train = True)
test_customset = CustomDataset(df = submission, transform = transform, train = False)

train_loader = DataLoader(
    train_customset,
    batch_size = batch_size,
    shuffle=True,
    num_workers = num_workers,
)

val_loader = DataLoader(
    val_customset,
    batch_size = batch_size,
    shuffle=True,
    num_workers = num_workers,
)

test_loader = DataLoader(
    test_customset,
    batch_size = batch_size,
    shuffle=False,
    num_workers = num_workers,
)

# 사람 다르게 하는 col
cv_taget_col = 'cv_taget_col'



In [13]:
'''
모델 설정
'''
model = Swin_T().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor = 0.1, eps = 1e-09, patience = 5)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [14]:
len(train_loader)

237

## 학습

In [15]:
torch.cuda.empty_cache()
min_val_loss = float("inf")
early_stopping_count = 0

for epoch in tqdm(range(1, epochs + 1)):
    train_loss, train_acc, train_f1 = train(model = model, data_loader = train_loader, optimizer = optimizer, scheduler = scheduler, criterion = criterion)
    val_loss, val_acc, val_f1 = val(model = model, data_loader = val_loader, criterion = criterion)
    
    print(f'epoch : {epoch}, train_loss : {train_loss}, train_acc : {train_acc}, train_f1 : {train_f1}, val_loss : {val_loss}, val_acc : {val_acc}, val_f1 : {val_f1}')
    
    # 모델 저장
    if val_loss < min_val_loss:
        min_val_loss = val_loss
        torch.save(model.state_dict(), model_dir + f'best_{model_path}.pt')
        early_stopping_count = 0
    else:
        early_stopping_count += 1
        if early_stopping_count == 10:
            print('early_stopping')
            break

  0%|          | 0/30 [00:00<?, ?it/s]

epoch : 1, train_loss : 2.434020025317679, train_acc : 0.19953703703703704, train_f1 : 0.04462453478783077, val_loss : 2.380245800813039, val_acc : 0.19153439153439153, val_f1 : 0.02529209855959206


  3%|▎         | 1/30 [06:21<3:04:30, 381.74s/it]

epoch : 2, train_loss : 2.389167255490138, train_acc : 0.21276455026455027, train_f1 : 0.04149071750250361, val_loss : 2.330269674460093, val_acc : 0.1962962962962963, val_f1 : 0.031206220378281


 10%|█         | 3/30 [18:59<2:51:02, 380.11s/it]

epoch : 3, train_loss : 2.388271902180925, train_acc : 0.21164021164021163, train_f1 : 0.04290320238867349, val_loss : 2.3609894116719565, val_acc : 0.21455026455026455, val_f1 : 0.02904705839122294


 13%|█▎        | 4/30 [25:16<2:44:12, 378.95s/it]

epoch : 4, train_loss : 2.3858619676863593, train_acc : 0.20992063492063492, train_f1 : 0.03920500104837338, val_loss : 2.3907374580701193, val_acc : 0.21455026455026455, val_f1 : 0.02551481895723985


 17%|█▋        | 5/30 [31:32<2:37:32, 378.12s/it]

epoch : 5, train_loss : 2.3853262979773024, train_acc : 0.20965608465608465, train_f1 : 0.03820674294260352, val_loss : 2.3841930429140725, val_acc : 0.21455026455026455, val_f1 : 0.02549991779991246


Traceback (most recent call last):
  File "/opt/conda/lib/python3.8/multiprocessing/queues.py", line 245, in _feed
    send_bytes(obj)
  File "/opt/conda/lib/python3.8/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/opt/conda/lib/python3.8/multiprocessing/connection.py", line 411, in _send_bytes
    self._send(header + buf)
  File "/opt/conda/lib/python3.8/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
 17%|█▋        | 5/30 [36:46<3:03:50, 441.23s/it]Traceback (most recent call last):
  File "/opt/conda/lib/python3.8/multiprocessing/queues.py", line 245, in _feed
    send_bytes(obj)
  File "/opt/conda/lib/python3.8/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/opt/conda/lib/python3.8/multiprocessing/connection.py", line 411, in _send_bytes
    self._send(header + buf)
  File "/opt/con

KeyboardInterrupt: 

In [16]:
model = Swin_T(pretrained=False).to(device)
model.load_state_dict(torch.load(model_dir + f'best_{model_path}.pt', map_location = device))

<All keys matched successfully>

## 예측

In [17]:
# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
all_predictions = pred(model = model, data_loader = test_loader)
submission['ans'] = all_predictions

In [18]:
def get_f1_score(y_true, y_pred):
    return f1_score(y_true, y_pred, average='macro')

def get_acc_score(y_true, y_pred):
    return accuracy_score(y_true, y_pred)


def val_pred(model, data_loader):
    model.eval()
    all_predictions = []
    all_targets = []
    for (images, targets) in data_loader:
        with torch.no_grad():
            images, targets = images.to(device), targets.to(device)
            pred = model(images)
            pred = pred.argmax(dim=-1)
            all_predictions.extend(pred.cpu().numpy())
            all_targets.extend(targets.cpu().numpy())
            
    return all_predictions, all_targets

## confusion matrix

In [19]:
val_predictions, val_targets = val_pred(model = model, data_loader = val_loader)

val_f1 = get_f1_score(y_true = val_targets, y_pred = val_predictions)
val_acc = get_acc_score(y_true = val_targets, y_pred = val_predictions)

val_confusion_matrix = pd.DataFrame((confusion_matrix(y_true = val_targets, y_pred = val_predictions)))
print(f'val confusion_matrix')
display(val_confusion_matrix.style.background_gradient(cmap='YlOrRd', axis = 0))
print(f'val fi : {val_f1}, val acc: {val_acc} \n')

val confusion_matrix


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,0,0,0,53,506,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,15,390,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,1,84,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,60,664,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,12,799,0,0,0,0,0,0,0,0,0,0,0,0,0
5,0,0,0,2,100,0,0,0,0,0,0,0,0,0,0,0,0,0
6,0,0,0,10,100,0,0,0,0,0,0,0,0,0,0,0,0,0
7,0,0,0,3,79,0,0,0,0,0,0,0,0,0,0,0,0,0
8,0,0,0,1,12,0,0,0,0,0,0,0,0,0,0,0,0,0
9,0,0,0,12,122,0,0,0,0,0,0,0,0,0,0,0,0,0


val fi : 0.02736995467735361, val acc: 0.22724867724867726 



## 제출 파일 생성

In [20]:
# 제출할 파일을 저장합니다.
submission.to_csv(os.path.join(submission_dir, f'{model_path}_pretrained.csv'), index=False)
print('test inference is done!')

test inference is done!


In [21]:
submission.head()

Unnamed: 0,ImageID,ans
0,cbc5c6e168e63498590db46022617123f1fe1268.jpg,4
1,0e72482bf56b3581c081f7da2a6180b8792c7089.jpg,4
2,b549040c49190cedc41327748aeb197c1670f14d.jpg,4
3,4f9cb2a045c6d5b9e50ad3459ea7b791eb6e18bc.jpg,4
4,248428d9a4a5b6229a7081c32851b90cb8d38d0c.jpg,4


In [22]:
model

Swin_T(
  (model): SwinTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 192, kernel_size=(4, 4), stride=(4, 4))
      (norm): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (layers): Sequential(
      (0): BasicLayer(
        dim=192, input_resolution=(56, 56), depth=2
        (blocks): ModuleList(
          (0): SwinTransformerBlock(
            (norm1): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
            (attn): WindowAttention(
              (qkv): Linear(in_features=192, out_features=576, bias=True)
              (attn_drop): Dropout(p=0.0, inplace=False)
              (proj): Linear(in_features=192, out_features=192, bias=True)
              (proj_drop): Dropout(p=0.0, inplace=False)
              (softmax): Softmax(dim=-1)
            )
            (drop_path): Identity()
            (norm2): LayerNorm((192,), eps=1e-05, elementwise_affine=True)
            (mlp): Mlp(
              (f