In [3]:
import pandas as pd
import numpy as np
import cv2
import PIL
import os
from glob import glob
import pickle
import random

import timm
from pathlib import Path
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
from torch.utils.data import DataLoader, Dataset, Subset
from efficientnet_pytorch import EfficientNet
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from adamp import AdamP
from tqdm import tqdm, notebook
from PIL import Image

In [2]:
train_dir = '/opt/ml/input/data/train/new_imgs'
transform = T.Compose([
    T.Resize((224,224)),
    T.ToTensor(),
    T.Normalize(mean=(0.548, 0.504, 0.479), std=(0.237, 0.247, 0.246))
]) 

seed = 37
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True
print(f'seed : {seed}')

seed : 37


In [3]:
class maskImageDataset(Dataset):
    def __init__(self,path,transform=None):
        self.image,self.label = self.labeling(path)
        self.transform = transform
        
    def __getitem__(self,idx):
        image,label = Image.open(self.image[idx]),self.label[idx]
        if self.transform:
            image = self.transform(image)
        return image,label

    def __len__(self):
            return len(self.label)

    def labeling(self,paths):
        x = []
        y = []
        for dic in os.listdir(paths):
            if '._' in dic or 'ipynb_checkpoints' in dic:
                continue
            dir_path = paths + '/'+ dic
            code = 0
            if dic[7] == 'f':
                code = 3
            age = int(dic[-2:])
            if age >= 60:
                code += 2
            elif age >=30:
                code += 1
            for image in os.listdir(dir_path):
                if '._' in image or 'ipynb_checkpoints' in image:
                    continue
                if 'png' in image:
                    continue
                image_path = dir_path + '/' + image 
                x.append(image_path)
                label = [0 for _ in range(18)]
                y.append(self.age_labeling(image,code))
        return x,y
                
    def age_labeling(self,path,inputs):
        if 'incorrect_mask' in path:
            return inputs + 6
        elif 'normal' in path:
            return inputs + 12
        else:
            return inputs



In [4]:
DATA = maskImageDataset(train_dir,transform)

train_loader = DataLoader(dataset=DATA, batch_size=16,shuffle=True)



In [5]:
import timm
import torch
import torch.nn as nn


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def create_model(num_classes):
    model = timm.create_model('tf_efficientnet_b5', pretrained=True, num_classes = num_classes)
    model.classifier = nn.Sequential(
    nn.Dropout(0.25),
    nn.Linear(2048, num_classes)
    )
    return model

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 
print(f"{device} is using!")
efficientnet_b5=create_model(18)
efficientnet_b5.to(device)
LEARNING_RATE = 0.0001
NUM_EPOCH = 5 
loss_fn = torch.nn.CrossEntropyLoss() 
optimizer = torch.optim.Adam(efficientnet_b5.parameters(), lr=LEARNING_RATE)


cuda:0 is using!


In [7]:
from tqdm.notebook import tqdm


In [8]:
dataloaders = {
    "train" : train_loader,

}


In [9]:
best_test_accuracy=0
best_test_loss=9999.
for epoch in range(5):
    for phase in ["train"]:
        running_loss=0.
        running_acc=0.
        if phase == "train":
            efficientnet_b5.train()
        if phase == "val":
            efficientnet_b5.eval()
        
        for ind, (images, labels) in enumerate(tqdm(dataloaders[phase])):
            images=images.to(device)
            labels=labels.to(device)

            optimizer.zero_grad()
            
            with torch.set_grad_enabled(phase=="train"):
                logits=efficientnet_b5(images)
                _,preds=torch.max(logits,1)
                loss=loss_fn(logits,labels)
            if phase =="train":
                loss.backward()
                optimizer.step()
        
            running_loss+=loss.item()*images.size(0)
            running_acc+=torch.sum(preds==labels.data)
        epoch_loss = running_loss / len(dataloaders[phase].dataset)
        epoch_acc = running_acc / len(dataloaders[phase].dataset)
    print(f"현재 epoch-{epoch}의 {phase}-데이터 셋에서 평균 Loss : {epoch_loss:.3f}, 평균 Accuracy : {epoch_acc:.3f}")
    if phase == "test" and best_test_accuracy < epoch_acc: 
      best_test_accuracy = epoch_acc
    if phase == "test" and best_test_loss > epoch_loss:
      best_test_loss = epoch_loss
print("학습 종료!")
print(f"최고 accuracy : {best_test_accuracy}, 최고 낮은 loss : {best_test_loss}")
PATH = './'
torch.save(efficientnet_b5,'./efficientnet_b5.pt')


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1150.0), HTML(value='')))


현재 epoch-0의 train-데이터 셋에서 평균 Loss : 0.556, 평균 Accuracy : 0.835


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1150.0), HTML(value='')))


현재 epoch-1의 train-데이터 셋에서 평균 Loss : 0.130, 평균 Accuracy : 0.961


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1150.0), HTML(value='')))


현재 epoch-2의 train-데이터 셋에서 평균 Loss : 0.058, 평균 Accuracy : 0.982


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1150.0), HTML(value='')))


현재 epoch-3의 train-데이터 셋에서 평균 Loss : 0.038, 평균 Accuracy : 0.988


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1150.0), HTML(value='')))


현재 epoch-4의 train-데이터 셋에서 평균 Loss : 0.038, 평균 Accuracy : 0.988
학습 종료!
최고 accuracy : 0, 최고 낮은 loss : 9999.0


In [85]:
class TestDataset(Dataset):
    def __init__(self, img_paths, transform):
        self.img_paths = img_paths
        self.transform = transform

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])

        if self.transform:
            image = self.transform(image)
        return image

    def __len__(self):
        return len(self.img_paths)

In [86]:
# meta 데이터와 이미지 경로를 불러옵니다.
test_dir = '/opt/ml/input/data/eval'
submission = pd.read_csv(os.path.join(test_dir, 'info.csv'))
image_dir = './new_images'

# Test Dataset 클래스 객체를 생성하고 DataLoader를 만듭니다.
image_paths = [os.path.join(image_dir, img_id) for img_id in submission.ImageID]
transform = T.Compose([
    T.Resize((224, 224) ),
    T.ToTensor(),
    T.Normalize(mean=(0.548, 0.504, 0.479), std=(0.237, 0.247, 0.246))
])
dataset = TestDataset(image_paths, transform)

loader = DataLoader(
    dataset,
    shuffle=False
)

# 모델을 정의합니다. (학습한 모델이 있다면 torch.load로 모델을 불러주세요!)
device = torch.device('cuda')
efficientnet_b5 = torch.load('./Seudo_efficientnet_b5.pt')
efficientnet_b5.eval()
SoftMax=nn.Softmax()
threshold=0.7
# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
all_predictions = []
for images in loader:
    with torch.no_grad():
        images = images.to(device)
        pred = efficientnet_b5(images)
        probs = SoftMax(pred)
        if torch.max(probs)>=threshold:
            pred = pred.argmax(dim=-1)
            all_predictions.extend(pred.cpu().numpy())
            continue
        all_predictions.extend(torch.Tensor([100]))
submission['ans'] = all_predictions

# 제출할 파일을 저장합니다.
submission.to_csv( './Sudo_efficientnet_b5.csv', index=False)
print('test inference is done!')

  probs = SoftMax(pred)


test inference is done!


In [88]:
temp1=pd.read_csv('./efficientnet_b5.csv')
temp1['ans'].value_counts()

0               2283
1               2001
3               1499
4               1467
tensor(100.)     769
2                685
13               547
7                534
6                459
12               452
5                390
10               358
15               314
9                298
16               286
8                 97
17                61
11                55
14                45
Name: ans, dtype: int64

In [107]:
temp2=pd.read_csv('./Sudo_efficientnet_b5.csv')
temp2['ans'].value_counts()

0               2329
1               2187
4               1579
3               1510
2                800
13               561
7                521
12               468
6                459
5                388
10               373
15               315
9                301
16               298
tensor(100.)     182
8                111
17                78
11                75
14                65
Name: ans, dtype: int64

In [108]:
temp3=pd.read_csv('./tmp.csv')

len(temp3)
len(temp2)

12600

In [118]:
cnt=0
for i in range(int(len(temp2))):
    if '(' in temp2['ans'][i]:
        continue
    if int(temp3['ans'][i]) == int(temp2['ans'][i]):
       cnt+=1
print(cnt)


10273


In [120]:
temp4=pd.read_csv('./efficientnet_b5.csv')


In [123]:
cnt=0
for i in range(int(len(temp4))):
    if '(' in temp4['ans'][i]:
        continue
    if int(temp3['ans'][i]) == int(temp4['ans'][i]):
       cnt+=1
print(cnt)

9950


In [24]:
pseudo_csv_path='./efficientnet_b5.csv'
pseudo_image='./new_images'
transform = T.Compose([
    T.Resize((224, 224) ),
    T.ToTensor(),
    T.Normalize(mean=(0.548, 0.504, 0.479), std=(0.237, 0.247, 0.246))
])

In [80]:
class SeudoDataset(Dataset):
    def __init__(self,csv_path,img_path,transform=None):
        self.image,self.label = self.labeling(csv_path,img_path)
        self.transform = transform
        
    def __getitem__(self,idx):
        image,label = Image.open(self.image[idx]),self.label[idx]
        if self.transform:
            image = self.transform(image)
        return image,label

    def __len__(self):
            return len(self.label)

    def labeling(self,csv_path,img_path):
        x = []
        y = []
        seudo_csv=pd.read_csv(csv_path)
        for i in range(len(seudo_csv)):
            if ')' in seudo_csv['ans'][i]:
                continue
            x.append(os.path.join(img_path,seudo_csv['ImageID'][i]))
            temp=int(seudo_csv['ans'][i])
            y.append(torch.tensor(temp))
        return x,y
                
    

In [81]:
Seudo_Data=SeudoDataset(pseudo_csv_path,pseudo_image,transform)
Seudo_Data[0]

(tensor([[[-1.1705, -1.5180, -1.3194,  ..., -1.6669, -1.4684, -1.2036],
          [-1.4684, -1.3691, -1.1871,  ..., -1.4849, -1.0712, -0.7072],
          [-1.4518, -0.9885, -0.7403,  ..., -0.9885, -0.5583, -0.3928],
          ...,
          [-1.5014, -1.4353, -1.4187,  ..., -1.4353, -1.5014, -1.5676],
          [-1.5345, -1.4684, -1.3360,  ..., -1.4022, -1.4684, -1.5345],
          [-1.4022, -1.3360, -1.2698,  ..., -1.4187, -1.4849, -1.5511]],
 
         [[-0.9132, -1.2466, -1.0561,  ..., -1.4689, -1.2625, -1.0244],
          [-1.1990, -1.1038, -0.9291,  ..., -1.2784, -0.8815, -0.5481],
          [-1.1831, -0.7386, -0.5004,  ..., -0.8180, -0.3893, -0.2623],
          ...,
          [-1.1990, -1.1355, -1.1196,  ..., -1.1514, -1.2149, -1.2784],
          [-1.2308, -1.1673, -1.0402,  ..., -1.1196, -1.1831, -1.2466],
          [-1.1038, -1.0402, -0.9767,  ..., -1.1355, -1.1990, -1.2625]],
 
         [[-0.8631, -1.1979, -1.0066,  ..., -1.5486, -1.3573, -1.1660],
          [-1.1501, -1.0544,

In [82]:
Seudo_Loader = DataLoader(dataset=Seudo_Data, batch_size=16,shuffle=True)


In [83]:

seudo_model=torch.load('./efficientnet_b5.pt')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
seudo_model.to(device)
loss_fn = torch.nn.CrossEntropyLoss() 
LEARNING_RATE = 0.0001
optimizer = torch.optim.Adam(seudo_model.parameters(), lr=LEARNING_RATE)
dataloaders = {
    "train" : Seudo_Loader,
}


In [84]:
best_test_accuracy=0
best_test_loss=9999.
for epoch in range(5):
    for phase in ["train"]:
        running_loss=0.
        running_acc=0.
        if phase == "train":
            seudo_model.train()
        if phase == "val":
            seudo_model.eval()
        
        for ind, (images, labels) in enumerate(tqdm(dataloaders[phase])):
            images=images.to(device)
            labels=labels.to(device)

            optimizer.zero_grad()
            
            with torch.set_grad_enabled(phase=="train"):
                logits=seudo_model(images)
                _,preds=torch.max(logits,1)
                loss=loss_fn(logits,labels)
            if phase =="train":
                loss.backward()
                optimizer.step()
        
            running_loss+=loss.item()*images.size(0)
            running_acc+=torch.sum(preds==labels.data)
        epoch_loss = running_loss / len(dataloaders[phase].dataset)
        epoch_acc = running_acc / len(dataloaders[phase].dataset)
    print(f"현재 epoch-{epoch}의 {phase}-데이터 셋에서 평균 Loss : {epoch_loss:.3f}, 평균 Accuracy : {epoch_acc:.3f}")
    if phase == "test" and best_test_accuracy < epoch_acc: 
      best_test_accuracy = epoch_acc
    if phase == "test" and best_test_loss > epoch_loss:
      best_test_loss = epoch_loss
print("학습 종료!")
print(f"최고 accuracy : {best_test_accuracy}, 최고 낮은 loss : {best_test_loss}")

torch.save(seudo_model,'./Seudo_efficientnet_b5.pt')


100%|██████████| 740/740 [03:05<00:00,  3.98it/s]
  0%|          | 0/740 [00:00<?, ?it/s]

현재 epoch-0의 train-데이터 셋에서 평균 Loss : 0.180, 평균 Accuracy : 0.940


100%|██████████| 740/740 [03:05<00:00,  3.99it/s]
  0%|          | 0/740 [00:00<?, ?it/s]

현재 epoch-1의 train-데이터 셋에서 평균 Loss : 0.080, 평균 Accuracy : 0.975


100%|██████████| 740/740 [03:07<00:00,  3.95it/s]
  0%|          | 0/740 [00:00<?, ?it/s]

현재 epoch-2의 train-데이터 셋에서 평균 Loss : 0.056, 평균 Accuracy : 0.981


100%|██████████| 740/740 [03:04<00:00,  4.02it/s]
  0%|          | 0/740 [00:00<?, ?it/s]

현재 epoch-3의 train-데이터 셋에서 평균 Loss : 0.030, 평균 Accuracy : 0.990


100%|██████████| 740/740 [02:59<00:00,  4.12it/s]


현재 epoch-4의 train-데이터 셋에서 평균 Loss : 0.040, 평균 Accuracy : 0.987
학습 종료!
최고 accuracy : 0, 최고 낮은 loss : 9999.0
