In [521]:
#!conda install -n base -c conda-forge jupyterlab_widgets -y

In [522]:
# conda install -c conda-forge ipywidgets

In [1]:
import random

import matplotlib
import matplotlib.pyplot as plt

import os

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset

import torchvision
from torchvision import datasets, models, transforms

from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np
import time
import datetime
from tqdm import tqdm

from PIL import Image

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # device 객체

# Set random seed
SEED = 2021
random.seed(SEED)
np.random.seed(SEED)
os.environ["PYTHONHASHSEED"] = str(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)  # type: ignore
torch.backends.cudnn.deterministic = True  # type: ignore
torch.backends.cudnn.benchmark = False  # type: ignore

In [2]:
device

device(type='cuda', index=0)

In [3]:
# 데이터셋을 불러올 때 사용할 변형(transformation) 객체 정의
transforms = transforms.Compose([
    transforms.Resize((224, 248)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomAffine((20)),
    transforms.RandomRotation(20),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # 정규화(normalization)
])

In [4]:
def train_val_dataset(data):
    
    train_idx = []
    val_idx = []
    
    for i in range(len(data.classes)):
        all_idx = []
        
        for j in tqdm(range(len(data))):
#         for j in range(len(data)):
            if i == data[j][1]:
                all_idx.append(j)
        
        split_idx = int(len(all_idx) * 0.9)
        
        train_idx = np.concatenate((train_idx, all_idx[:split_idx]))
        val_idx = np.concatenate((val_idx, all_idx[split_idx:]))
    
    datas = {}
    datas['train'] = Subset(data, list(map(int, train_idx)))
    datas['val'] = Subset(data, list(map(int, val_idx)))
    
    return datas

In [5]:
'''
def train_val_dataset(data, val_split=0.2):
    train_idx, val_idx = train_test_split(list(range(len(data))), test_size=val_split, shuffle=False)
    datas = {}
    datas['train'] = Subset(data, train_idx)
    datas['val'] = Subset(data, val_idx)
    return datas
'''

"\ndef train_val_dataset(data, val_split=0.2):\n    train_idx, val_idx = train_test_split(list(range(len(data))), test_size=val_split, shuffle=False)\n    datas = {}\n    datas['train'] = Subset(data, train_idx)\n    datas['val'] = Subset(data, val_idx)\n    return datas\n"

In [6]:
from torch.utils.data import Dataset, DataLoader

class TestDataset(Dataset):
    def __init__(self, img_paths, transform):
        self.img_paths = img_paths
        self.transform = transform

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])

        if self.transform:
            image = self.transform(image)
        return image

    def __len__(self):
        return len(self.img_paths)

In [7]:
test_dir = '/opt/ml/input/cropped_v2/eval'
submission = pd.read_csv(os.path.join(test_dir, 'info.csv'))

In [556]:
# datasets.ImageFolder(os.path.join(f'/opt/ml/input/cropped_v2/train/images_classified_age/'), transforms)[2]

In [557]:
def model_run(target, sub_df, test_dir):
    data_dir = f'/opt/ml/input/cropped_v2/train/images_classified_{target}/'
    
    dataset = datasets.ImageFolder(os.path.join(data_dir), transforms)
    
    print(f'*****{target}*****')
    print(dataset)
    print()
    
    class_names = dataset.classes
    print(class_names)
    print()
    
    print('****train, valid split****')
    dataset_split = train_val_dataset(dataset)
    
    dataloader = torch.utils.data.DataLoader(dataset_split['train'], batch_size=128, shuffle=True, num_workers=8)
    valid_dataloader = torch.utils.data.DataLoader(dataset_split['val'], batch_size=128, shuffle=False, num_workers=8)
    
    model = models.resnet34(pretrained=True)

    num_features = model.fc.in_features
    # 전이 학습(transfer learning): 모델의 출력 뉴런 수를 18개로 교체하여 마지막 레이어 다시 학습
    model.fc = nn.Linear(num_features, len(class_names)) 
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    
    num_epochs = 3 if target=='mask' else 5
    model.train()
    start_time = time.time()

    # 전체 반복(epoch) 수 만큼 반복하며
    print('****start epoch****')
    for epoch in range(num_epochs):
        running_loss = 0.
        running_corrects = 0
        
        if epoch == 2 and target=='age': optimizer = optim.SGD(model.parameters(), lr=0.0003, momentum=0.9)
        
        # 배치 단위로 학습 데이터 불러오기
        for inputs, labels in tqdm(dataloader):
            inputs = inputs.to(device)
            labels = labels.to(device)

            # 모델에 입력(forward)하고 결과 계산
            optimizer.zero_grad() # 전체 grad 값을 초기화.
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            # 역전파를 통해 기울기(gradient) 계산 및 학습 진행
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(dataset_split['train'])
        epoch_acc = running_corrects / len(dataset_split['train']) * 100.
        
        # validation
        model.eval()
        
        all_labels = []
        all_preds = []
        
        with torch.no_grad():
            running_loss = 0.
            running_corrects = 0
            
            for inputs, labels in valid_dataloader:
                inputs = inputs.to(device)
                labels = labels.to(device)

                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
        
                all_preds.extend(preds.cpu().numpy())
        
                loss = criterion(outputs, labels)

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
        
                all_labels.extend(labels.data.cpu().numpy())
        
            valid_epoch_loss = running_loss / len(dataset_split['val'])
            valid_epoch_acc = running_corrects / len(dataset_split['val']) * 100.
            F1_score = f1_score(all_labels, all_preds, average='macro')
            
        # 학습 과정 중에 결과 출력
        print('#{} Loss: {:.4f} Acc: {:.4f}% Time: {:.4f}s'.format(epoch, epoch_loss, epoch_acc, time.time() - start_time))
        print(f'Valid Loss: {valid_epoch_loss:.4f} Valid Acc: {valid_epoch_acc:.4f} F1 Score: {F1_score:.4f}')
        if F1_score > 0.77 and target == 'age' : break
        
    image_dir = os.path.join(test_dir, 'images')
    
    image_paths = [os.path.join(image_dir, img_id) for img_id in sub_df.ImageID]
    
    test_set = TestDataset(image_paths, transforms)
    
    test_dataloader = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False, num_workers=8)
    
    all_predictions = []
    
    model.eval()
    all_output = []        ###########################################################
    with torch.no_grad():
        for inputs in tqdm(test_dataloader):
            inputs = inputs.to(device)
        
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
        
            all_predictions.extend(preds.cpu().numpy())
            all_output.append(outputs)   #############################################
    mask_soft = torch.nn.functional.softmax(torch.cat(all_output,dim=0),dim=1)  ######
    for i in range(len(class_names)):  ###############################################
        sub_df[class_names[i]] = mask_soft[:,i].cpu().numpy()  #######################
    sub_df.to_csv(f'sub_{target}.csv')  ##############################################
    all_predictions2 = []

    for p in all_predictions:
        all_predictions2.append(class_names[p])
        
    sub_df[target] = all_predictions2
    torch.save(model.state_dict(), os.getcwd()+'/'+f'model_{target}')
    return sub_df.copy()

In [8]:
submission_mask  = model_run('mask', submission, test_dir)

NameError: name 'model_run' is not defined

In [None]:
submission = submission_mask.drop(['0','12','6','.ipynb_checkpoints'],axis=1)

In [None]:
submission = model_run('gender', submission, test_dir)

In [None]:
submission = submission.drop(['0','3'],axis=1)

## age 메타데이터 모델링

In [9]:
# submission = submission.astype({'gender':'int','mask':'int'})
# submission.to_csv('submission_before_age.csv', index=False)
submission = pd.read_csv('submission_before_age.csv')
ground_best_F1_score = 0

In [10]:
#age 예측을 위한 데이터셋
import copy

from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import ImageFolder

from PIL import Image
import os
import os.path
from typing import Any, Callable, cast, Dict, List, Optional, Tuple
from torchvision.datasets.folder import default_loader, IMG_EXTENSIONS

class TrainDatasetAge(ImageFolder):
    def __init__(
            self,
            root: str,
            transform: Optional[Callable] = None,
            target_transform: Optional[Callable] = None,
            loader: Callable[[str], Any] = default_loader,
            is_valid_file: Optional[Callable[[str], bool]] = None,
    ):
        super(ImageFolder, self).__init__(root, loader, IMG_EXTENSIONS if is_valid_file is None else None,
                                          transform=transform,
                                          target_transform=target_transform,
                                          is_valid_file=is_valid_file)
        self.imgs = self.samples
        ##위는 그냥 기존 코드
        ##아래는 직접 작성
        gender_dic = {'male':0, 'female':3 }
        mask_dic = {'mask':0, 'incor':6, 'norma':12} ##이는 전처리의 편의에 의함
        def feature_extract(f):
            ff = f[0].split('/')[-1].split('.')[0].replace('incorrect_mask','incorr').split('_')
            return gender_dic[ff[1]], mask_dic[ff[-1][:-1]]
        def feature_onehot(f):
            return torch.tensor((int(f[0]==3),int(f[1]==0),int(f[1]==6),int(f[1]==12)))
        def age(f):
            ff = f[0].split('/')[-1].split('.')[0].replace('incorrect_mask','incorr').split('_')
            return int(f)
        self.features = list(map(feature_extract, self.samples))
        self.features = list(map(feature_onehot, self.features))
        self.age = list(map(feature_onehot, self.features))

    def __getitem__(self, index: int) -> Tuple[Any, Any]:

        path, target = self.samples[index]
        sample = self.loader(path)
        if self.transform is not None:
            sample = self.transform(sample)
        if self.target_transform is not None:
            target = self.target_transform(target)
        #아래 한 줄과 리턴에 하나 추가
        features = self.features[index]
        age = self.age[index]
        return sample, target, features, age

In [11]:
from torch.utils.data import Dataset, DataLoader

class TestDatasetAge(Dataset):
    def __init__(self, img_paths, transform, submission):
        self.img_paths = img_paths
        self.transform = transform
        
        #순서가 같다는 전제. 아니면 다른 어려운 방법을 강구해야함.
        self.gender = submission['gender']
        self.mask = submission['mask']
        self.features = [(submission['gender'][i],submission['mask'][i]) for i in range(len(self.gender))]
        
        def feature_onehot(f):
            return torch.tensor((int(f[0]==3),int(f[1]==0),int(f[1]==6),int(f[1]==12))) 
        
        self.features = list(map(feature_onehot, self.features))
        
    
        
    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])

        if self.transform:
            image = self.transform(image) 
        features = self.features[index]
        return image, features

    def __len__(self):
        return len(self.img_paths)

In [12]:
# import timm
# class AgeCustomModel(nn.Module):
#     def __init__(self):
#         super().__init__()
#         self.model = timm.create_model('swin_large_patch9_window12_224', pretrained=False, in_chans=3)
#         self.model.head = nn.Linear(self.model.head.in_features, 128)
#         self.dropout = nn.Dropout(0.1)
#         self.dense1 = nn.Linear(516, 64) # 성별1 마스크 3으로 원핫 인코딩했기 때문에 4 추가
#         self.dense2 = nn.Linear(64, 3)
        
#     def forward(self, image, features):
#         x1 = self.model(image)
#         x = self.dropout(x1)
#         x = torch.cat([x, features], dim=1)
#         x = self.dense1(x)
#         x = self.dense2(x)
        
#         return x

In [13]:
class AgeCustomModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = models.resnet34(pretrained=True)
        in_features = self.model.fc.in_features
        self.model.fc = nn.Linear(in_features, 128)
        self.dropout = nn.Dropout(0.1)
        self.dense1 = nn.Linear(132, 64) # 성별1 마스크 3으로 원핫 인코딩했기 때문에 4 추가
        self.dense2 = nn.Linear(64, 3)
        
    def forward(self, image, features):
        x1 = self.model(image)
        x = self.dropout(x1)
        x = torch.cat([x, features], dim=1)
        x = self.dense1(x)
        x = self.dense2(x)
        
        return x

In [14]:
# class AgeCustomModel(nn.Module):
#     def __init__(self):
#         super().__init__()
#         self.model = models.resnet34(pretrained=True)
#         in_features = self.model.fc.in_features
#         self.model.fc = nn.Linear(in_features, 128)
#         self.dropout = nn.Dropout(0.1)
#         self.dense1 = nn.Linear(132, 64) # 성별1 마스크 3으로 원핫 인코딩했기 때문에 4 추가
#         self.dense2 = nn.Linear(64, 1)
        
#     def forward(self, image, features):
#         x1 = self.model(image)
#         x = self.dropout(x1)
#         x = torch.cat([x, features], dim=1)
#         x = self.dense1(x)
#         x = self.dense2(x)
        
#         return x

In [15]:
#     def forward(self, image, features, targets=None):
#         x1 = self.model(image)
#         x = self.dropout(x1)
#         x = torch.cat([x, features], dim=1)
#         x = self.dense1(x)
#         x = self.dense2(x)
        
#         x = torch.cat([x, x1, features], dim=1)
#         return x, 0, {}

In [16]:
data_dir = f'/opt/ml/input/cropped_v2/train/images_classified_age/'

dataset = TrainDatasetAge(os.path.join(data_dir), transforms)  ################################# 데이터셋 함수 변경

print(f'*****age*****')
print(dataset)
print()

class_names = dataset.classes
print(class_names)
print()

print('****train, valid split****')
dataset_split = train_val_dataset(dataset)
dataloader = torch.utils.data.DataLoader(dataset_split['train'], batch_size=60, shuffle=True, num_workers=4,drop_last=True)
valid_dataloader = torch.utils.data.DataLoader(dataset_split['val'], batch_size=60, shuffle=False, num_workers=4,drop_last=True)

  0%|          | 9/21588 [00:00<04:00, 89.91it/s]

*****age*****
Dataset TrainDatasetAge
    Number of datapoints: 21588
    Root location: /opt/ml/input/cropped_v2/train/images_classified_age/
    StandardTransform
Transform: Compose(
               Resize(size=(224, 248), interpolation=PIL.Image.BILINEAR)
               RandomHorizontalFlip(p=0.5)
               RandomAffine(degrees=[-20.0, 20.0])
               RandomRotation(degrees=[-20.0, 20.0], resample=False, expand=False)
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

['0', '1', '2']

****train, valid split****


100%|██████████| 21588/21588 [01:18<00:00, 275.63it/s]
100%|██████████| 21588/21588 [01:17<00:00, 278.68it/s]
100%|██████████| 21588/21588 [01:17<00:00, 277.48it/s]


In [None]:
# next(iter(valid_dataloader))[2][:10]

In [17]:
def age_model_run(sub_df, test_dir): #####target을 변수로 받지 않고 관련 부분들을 모두 수정 #########################
    data_dir = f'/opt/ml/input/cropped_v2/train/images_classified_age/'

    ################## 모델부분 코드 변경 #################
    model = AgeCustomModel()
    best_F1_score = 0 
    #######################################################
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
#     optimizer = optim.Adam(model.parameters(), lr=0.0001)
    
    num_epochs = 7
    model.train()
    start_time = time.time()

    # 전체 반복(epoch) 수 만큼 반복하며
    print('****start epoch****')
    for epoch in range(num_epochs):
        running_loss = 0.
        running_corrects = 0
        
#         if epoch == 3: optimizer = optim.SGD(model.parameters(), lr=0.0005, momentum=0.3)
#         if epoch == 5: optimizer = optim.SGD(model.parameters(), lr=0.002, momentum=0.3)
#         if epoch == 6: optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.3)
            
        # 배치 단위로 학습 데이터 불러오기                                ##################features 활용하는 부분 추가
#         for inputs, labels in tqdm(dataloader):
        for inputs, labels, features, _ in tqdm(dataloader):
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            features = features.to(device)##################features 활용하는 부분 추가

            # 모델에 입력(forward)하고 결과 계산
            optimizer.zero_grad() # 전체 grad 값을 초기화.
#             outputs = model(inputs)
            outputs = model(inputs,features) #안되면 model.forward로 바꿔보기
    
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            # 역전파를 통해 기울기(gradient) 계산 및 학습 진행
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(dataset_split['train'])
        epoch_acc = running_corrects / len(dataset_split['train']) * 100.
        
        # validation
        model.eval()
        
        all_labels = []
        all_preds = []
        
        with torch.no_grad():
            running_loss = 0.
            running_corrects = 0
            
#             for inputs, labels in valid_dataloader:
            for inputs, labels, features, _ in valid_dataloader:
                inputs = inputs.to(device)
                labels = labels.to(device)

                features = features.to(device)##################features 활용하는 부분 추가
            
#                 outputs = model(inputs)
                outputs = model(inputs,features)
                _, preds = torch.max(outputs, 1)
        
                all_preds.extend(preds.cpu().numpy())
        
                loss = criterion(outputs, labels)

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
        
                all_labels.extend(labels.data.cpu().numpy())
        
            valid_epoch_loss = running_loss / len(dataset_split['val'])
            valid_epoch_acc = running_corrects / len(dataset_split['val']) * 100.
            F1_score = f1_score(all_labels, all_preds, average='macro')
            
        # 학습 과정 중에 결과 출력
        print('#{} Loss: {:.4f} Acc: {:.4f}% Time: {:.4f}s'.format(epoch, epoch_loss, epoch_acc, time.time() - start_time))
        print(f'Valid Loss: {valid_epoch_loss:.4f} Valid Acc: {valid_epoch_acc:.4f} F1 Score: {F1_score:.4f}')
#         if F1_score > 0.77 and target == 'age' : break
        if F1_score > best_F1_score : best_F1_score, best_model = F1_score, copy.deepcopy(model)   ############최고모델 활용을 위한 코드 추가
        print('best_F1_score : ',best_F1_score) ############최고모델 활용을 위한 코드 추가
        
    if ground_best_F1_score < best_F1_score : ############최고모델 활용을 위한 코드 추가
        print(f"ground_best_F1_score갱신! {ground_best_F1_score} -> {best_F1_score}")
        now = datetime.datetime.now()
        nowDatetime = now.strftime('%Y_%m%d_%H%M')
        torch.save(best_model.state_dict(), os.getcwd()+'/'+f'model_age_{nowDatetime}') ############최고모델 활용을 위한 코드 추가
        
        
    image_dir = os.path.join(test_dir, 'images')
    
    image_paths = [os.path.join(image_dir, img_id) for img_id in sub_df.ImageID]
    
    test_set = TestDatasetAge(image_paths, transforms, submission)  ###################################데이터셋 변경
    
    test_dataloader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=False, num_workers=4)
    
    all_predictions = []

    best_model.eval()
    all_output = []        ###########################################################
    with torch.no_grad():
#         for inputs in tqdm(test_dataloader):

        for inputs, features in tqdm(test_dataloader):

            inputs = inputs.to(device) 
            features = features.to(device) ##################features 활용하는 부분 추가
            
#             outputs = model(inputs)
            outputs = best_model(inputs,features)
            _, preds = torch.max(outputs, 1)
        
            all_predictions.extend(preds.cpu().numpy())
            all_output.append(outputs)   #############################################
    mask_soft = torch.nn.functional.softmax(torch.cat(all_output,dim=0),dim=1)  ######
    for i in range(len(class_names)):  ###############################################
        sub_df[class_names[i]] = mask_soft[:,i].cpu().numpy()  #######################
    sub_df.to_csv(f'sub_age.csv')  ###################################################
    all_predictions2 = []

    for p in all_predictions:
        all_predictions2.append(class_names[p])
        
    sub_df['age'] = all_predictions2
    
    return sub_df.copy(), max(ground_best_F1_score,best_F1_score)

In [18]:
# def trs(a):
#     a1 = []
#     for i in a:
#         a1 = torch.Tensor()
#         for j in i:
#             if j < 30:
#                 a1 = torch.cat([a1, torch.tensor([[1,0,0]])],dim=0)
#             elif j <59:
#                 a1 = torch.cat([a1, torch.tensor([[0,1,0]])],dim=0)
#             else:
#                 a1 = torch.cat([a1, torch.tensor([[0,0,1]])],dim=0)
#         a2.append(a1)
#     return a2

In [19]:
# def age_model_run(sub_df, test_dir): #####target을 변수로 받지 않고 관련 부분들을 모두 수정 #########################
#     data_dir = f'/opt/ml/input/cropped_v2/train/images_classified_age/'

#     ################## 모델부분 코드 변경 #################
#     model = AgeCustomModel()
#     best_F1_score = 0 
#     #######################################################
#     model = model.to(device)

# #     criterion = nn.CrossEntropyLoss()
#     criterion = nn.MSELoss()
#     optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# #     optimizer = optim.Adam(model.parameters(), lr=0.0001)
    
#     num_epochs = 7
#     model.train()
#     start_time = time.time()

#     # 전체 반복(epoch) 수 만큼 반복하며
#     print('****start epoch****')
#     for epoch in range(num_epochs):
#         running_loss = 0.
#         running_corrects = 0

#         # 배치 단위로 학습 데이터 불러오기                                ##################features 활용하는 부분 추가
# #         for inputs, labels in tqdm(dataloader):
#         for inputs, _ , features, labels in tqdm(dataloader):
#             inputs = inputs.unsqueeze(1).float().to(device)
#             labels = labels.to(device)
            
#             features = features.to(device)##################features 활용하는 부분 추가

#             # 모델에 입력(forward)하고 결과 계산
#             optimizer.zero_grad() # 전체 grad 값을 초기화.
# #             outputs = model(inputs)
#             outputs = model(inputs,features) #안되면 model.forward로 바꿔보기
    
#             _, preds = torch.max(outputs, 1)
#             loss = criterion(outputs, labels)

#             # 역전파를 통해 기울기(gradient) 계산 및 학습 진행
#             loss.backward()
#             optimizer.step()

#             running_loss += loss.item() * inputs.size(0)
#             running_corrects += torch.sum(preds == labels.data)

#         epoch_loss = running_loss / len(dataset_split['train'])
#         epoch_acc = running_corrects / len(dataset_split['train']) * 100.
        
#         # validation
#         model.eval()
        
#         all_labels = []
#         all_preds = []
        
#         with torch.no_grad():
#             running_loss = 0.
#             running_corrects = 0
            
# #             for inputs, labels in valid_dataloader:
#             for inputs, labels, features, _ in valid_dataloader:
#                 inputs = inputs.to(device)
#                 labels = labels.to(device)

#                 features = features.to(device)##################features 활용하는 부분 추가
            
# #                 outputs = model(inputs)
#                 outputs = model(inputs,features)
#                 outputs = trs(outputs)
#                 _, preds = torch.max(outputs, 1)
        
#                 all_preds.extend(preds.cpu().numpy())
        
#                 loss = criterion(outputs, labels)

#                 running_loss += loss.item() * inputs.size(0)
#                 running_corrects += torch.sum(preds == labels.data)
        
#                 all_labels.extend(labels.data.cpu().numpy())
        
#             valid_epoch_loss = running_loss / len(dataset_split['val'])
#             valid_epoch_acc = running_corrects / len(dataset_split['val']) * 100.
#             F1_score = f1_score(all_labels, all_preds, average='macro')
            
#         # 학습 과정 중에 결과 출력
#         print('#{} Loss: {:.4f} Acc: {:.4f}% Time: {:.4f}s'.format(epoch, epoch_loss, epoch_acc, time.time() - start_time))
#         print(f'Valid Loss: {valid_epoch_loss:.4f} Valid Acc: {valid_epoch_acc:.4f} F1 Score: {F1_score:.4f}')
# #         if F1_score > 0.77 and target == 'age' : break
#         if F1_score > best_F1_score : best_F1_score, best_model = F1_score, copy.deepcopy(model)   ############최고모델 활용을 위한 코드 추가
#         print('best_F1_score : ',best_F1_score) ############최고모델 활용을 위한 코드 추가
        
#     if ground_best_F1_score < best_F1_score : ############최고모델 활용을 위한 코드 추가
#         print(f"ground_best_F1_score갱신! {ground_best_F1_score} -> {best_F1_score}")
#         now = datetime.datetime.now()
#         nowDatetime = now.strftime('%Y_%m%d_%H%M')
#         torch.save(best_model.state_dict(), os.getcwd()+'/'+f'model_age_{nowDatetime}') ############최고모델 활용을 위한 코드 추가
        
        
#     image_dir = os.path.join(test_dir, 'images')
    
#     image_paths = [os.path.join(image_dir, img_id) for img_id in sub_df.ImageID]
    
#     test_set = TestDatasetAge(image_paths, transforms, submission)  ###################################데이터셋 변경
    
#     test_dataloader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=False, num_workers=4)
    
#     all_predictions = []

#     best_model.eval()
#     all_output = []        ###########################################################
#     with torch.no_grad():
# #         for inputs in tqdm(test_dataloader):

#         for inputs, features in tqdm(test_dataloader):

#             inputs = inputs.to(device) 
#             features = features.to(device) ##################features 활용하는 부분 추가
            
# #             outputs = model(inputs)
#             outputs = best_model(inputs,features)
#             _, preds = torch.max(outputs, 1)
        
#             all_predictions.extend(preds.cpu().numpy())
#             all_output.append(outputs)   #############################################
#     mask_soft = torch.nn.functional.softmax(torch.cat(all_output,dim=0),dim=1)  ######
#     for i in range(len(class_names)):  ###############################################
#         sub_df[class_names[i]] = mask_soft[:,i].cpu().numpy()  #######################
#     sub_df.to_csv(f'sub_age.csv')  ###################################################
#     all_predictions2 = []

#     for p in all_predictions:
#         all_predictions2.append(class_names[p])
        
#     sub_df['age'] = all_predictions2
    
#     return sub_df.copy(), max(ground_best_F1_score,best_F1_score)

In [20]:
submission_after_age, ground_best_F1_score = age_model_run(submission, test_dir)

  0%|          | 0/323 [00:00<?, ?it/s]

****start epoch****


100%|██████████| 323/323 [01:02<00:00,  5.17it/s]
  0%|          | 0/323 [00:00<?, ?it/s]

#0 Loss: 0.4904 Acc: 78.2891% Time: 65.4957s
Valid Loss: 0.7043 Valid Acc: 73.8889 F1 Score: 0.6628
best_F1_score :  0.6627686157616021


100%|██████████| 323/323 [01:01<00:00,  5.25it/s]
  0%|          | 0/323 [00:00<?, ?it/s]

#1 Loss: 0.2863 Acc: 87.7908% Time: 130.0870s
Valid Loss: 1.0418 Valid Acc: 69.8148 F1 Score: 0.5819
best_F1_score :  0.6627686157616021


100%|██████████| 323/323 [01:01<00:00,  5.27it/s]
  0%|          | 0/323 [00:00<?, ?it/s]

#2 Loss: 0.2069 Acc: 91.4042% Time: 194.4426s
Valid Loss: 0.9183 Valid Acc: 73.1018 F1 Score: 0.6398
best_F1_score :  0.6627686157616021


100%|██████████| 323/323 [01:01<00:00,  5.28it/s]
  0%|          | 0/323 [00:00<?, ?it/s]

#3 Loss: 0.1644 Acc: 92.9895% Time: 258.7118s
Valid Loss: 1.4469 Valid Acc: 69.2593 F1 Score: 0.5642
best_F1_score :  0.6627686157616021


100%|██████████| 323/323 [01:01<00:00,  5.26it/s]
  0%|          | 0/323 [00:00<?, ?it/s]

#4 Loss: 0.1217 Acc: 94.9043% Time: 323.1698s
Valid Loss: 1.3952 Valid Acc: 68.9352 F1 Score: 0.5541
best_F1_score :  0.6627686157616021


100%|██████████| 323/323 [01:01<00:00,  5.26it/s]
  0%|          | 0/323 [00:00<?, ?it/s]

#5 Loss: 0.0897 Acc: 96.2477% Time: 387.6764s
Valid Loss: 0.8374 Valid Acc: 77.8704 F1 Score: 0.7238
best_F1_score :  0.7238224238831882


100%|██████████| 323/323 [01:01<00:00,  5.27it/s]


#6 Loss: 0.0780 Acc: 96.7830% Time: 452.0649s
Valid Loss: 1.3250 Valid Acc: 73.8889 F1 Score: 0.6753
best_F1_score :  0.7238224238831882
ground_best_F1_score갱신! 0 -> 0.7238224238831882


100%|██████████| 197/197 [00:14<00:00, 13.87it/s]


In [None]:
ffff = submission_after_age.astype({'age':'int','gender':'int','mask':'int'})
try : ffff['ans'] = ffff['mask'] + ffff['gender'] + ffff['age'] ; ffff = ffff.drop(['mask','gender','age'],axis=1)
except : pass
ffff

In [202]:
now = datetime.datetime.now()
nowDatetime = now.strftime('%m%d_%H%M')
ffff.to_csv(f'sub{nowDatetime}_AgeF1-{ground_best_F1_score:.4f}_epo2.csv', index=False)