In [1]:
#!conda install -n base -c conda-forge jupyterlab_widgets -y

In [2]:
# conda install -c conda-forge ipywidgets

In [3]:
import matplotlib
import matplotlib.pyplot as plt

import os

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset

import torchvision
from torchvision import datasets, models, transforms

from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np
import time
from tqdm import tqdm

from PIL import Image

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # device 객체

# Set random seed
SEED = 2021
random.seed(SEED)
np.random.seed(SEED)
os.environ["PYTHONHASHSEED"] = str(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)  # type: ignore
torch.backends.cudnn.deterministic = True  # type: ignore
torch.backends.cudnn.benchmark = True  # type: ignore

In [4]:
device

device(type='cuda', index=0)

In [5]:
# 데이터셋을 불러올 때 사용할 변형(transformation) 객체 정의
transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # 정규화(normalization)
])

In [6]:
def train_val_dataset(data):
    
    train_idx = []
    val_idx = []
    
    for i in range(len(data.classes)):
        all_idx = []
        
        for j in tqdm(range(len(data))):
            if i == data[j][1]:
                all_idx.append(j)
        
        split_idx = int(len(all_idx) * 0.8)
        
        train_idx = np.concatenate((train_idx, all_idx[:split_idx]))
        val_idx = np.concatenate((val_idx, all_idx[split_idx:]))
    
    datas = {}
    datas['train'] = Subset(data, list(map(int, train_idx)))
    datas['val'] = Subset(data, list(map(int, val_idx)))
    
    return datas

In [7]:
'''
def train_val_dataset(data, val_split=0.2):
    train_idx, val_idx = train_test_split(list(range(len(data))), test_size=val_split, shuffle=False)
    datas = {}
    datas['train'] = Subset(data, train_idx)
    datas['val'] = Subset(data, val_idx)
    return datas
'''

"\ndef train_val_dataset(data, val_split=0.2):\n    train_idx, val_idx = train_test_split(list(range(len(data))), test_size=val_split, shuffle=False)\n    datas = {}\n    datas['train'] = Subset(data, train_idx)\n    datas['val'] = Subset(data, val_idx)\n    return datas\n"

In [8]:
from torch.utils.data import Dataset, DataLoader

class TestDataset(Dataset):
    def __init__(self, img_paths, transform):
        self.img_paths = img_paths
        self.transform = transform

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])

        if self.transform:
            image = self.transform(image)
        return image

    def __len__(self):
        return len(self.img_paths)

In [9]:
test_dir = '/opt/ml/input/cropped_v2.1/eval'
submission = pd.read_csv(os.path.join(test_dir, 'info.csv'))

In [10]:
def model_run(target, sub_df, test_dir):
    data_dir = f'/opt/ml/input/cropped_v2.1/train/images_classified_{target}/'
    
    dataset = datasets.ImageFolder(os.path.join(data_dir), transforms)
    
    print(f'*****{target}*****')
    print(dataset)
    print()
    
    class_names = dataset.classes
    print(class_names)
    print()
    
    print('****train, valid split****')
    dataset_split = train_val_dataset(dataset)
    
    dataloader = torch.utils.data.DataLoader(dataset_split['train'], batch_size=128, shuffle=True, num_workers=8)
    valid_dataloader = torch.utils.data.DataLoader(dataset_split['val'], batch_size=128, shuffle=False, num_workers=8)
    
    model = models.resnet34(pretrained=True)

    num_features = model.fc.in_features
    # 전이 학습(transfer learning): 모델의 출력 뉴런 수를 18개로 교체하여 마지막 레이어 다시 학습
    model.fc = nn.Linear(num_features, len(class_names)) 
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    
    num_epochs = 10
    model.train()
    start_time = time.time()

    # 전체 반복(epoch) 수 만큼 반복하며
    print('****start epoch****')
    for epoch in range(num_epochs):
        running_loss = 0.
        running_corrects = 0

        # 배치 단위로 학습 데이터 불러오기
        for inputs, labels in tqdm(dataloader):
            inputs = inputs.to(device)
            labels = labels.to(device)

            # 모델에 입력(forward)하고 결과 계산
            optimizer.zero_grad() # 전체 grad 값을 초기화.
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            # 역전파를 통해 기울기(gradient) 계산 및 학습 진행
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(dataset_split['train'])
        epoch_acc = running_corrects / len(dataset_split['train']) * 100.
        
        # validation
        model.eval()
        
        all_labels = []
        all_preds = []
        
        with torch.no_grad():
            running_loss = 0.
            running_corrects = 0
            
            for inputs, labels in valid_dataloader:
                inputs = inputs.to(device)
                labels = labels.to(device)

                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
        
                all_preds.extend(preds.cpu().numpy())
        
                loss = criterion(outputs, labels)

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
        
                all_labels.extend(labels.data.cpu().numpy())
        
            valid_epoch_loss = running_loss / len(dataset_split['val'])
            valid_epoch_acc = running_corrects / len(dataset_split['val']) * 100.
            F1_score = f1_score(all_labels, all_preds, average='macro')
        
        # 학습 과정 중에 결과 출력
        print('#{} Loss: {:.4f} Acc: {:.4f}% Time: {:.4f}s'.format(epoch, epoch_loss, epoch_acc, time.time() - start_time))
        print(f'Valid Loss: {valid_epoch_loss:.4f} Valid Acc: {valid_epoch_acc:.4f} F1 Score: {F1_score:.4f}')
    '''
    valid_dataloader = torch.utils.data.DataLoader(dataset_split['val'], batch_size=128, shuffle=False, num_workers=8)
    
    model.eval()
    start_time = time.time()

    all_labels = []
    all_preds = []

    with torch.no_grad():
        running_loss = 0.
        running_corrects = 0

        for inputs, labels in valid_dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
        
            all_preds.extend(preds.cpu().numpy())
        
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
        
        
            all_labels.extend(labels.data.cpu().numpy())
        
            
            # 한 배치의 첫 번째 이미지에 대하여 결과 시각화
            print(f'[예측 결과: {class_names[preds[0]]}] (실제 정답: {class_names[labels.data[0]]})')
            imshow(inputs.cpu().data[0], title='예측 결과: ' + class_names[preds[0]])
            
            
        epoch_loss = running_loss / len(dataset_split['val'])
        epoch_acc = running_corrects / len(dataset_split['val']) * 100.
        print('[Test Phase] Loss: {:.4f} Acc: {:.4f}% Time: {:.4f}s'.format(epoch_loss, epoch_acc, time.time() - start_time))
    
    F1_score = f1_score(all_labels, all_preds, average='macro')
    
    print(f'*****F1 Score: {F1_score}*****')
    '''
    image_dir = os.path.join(test_dir, 'images')
    
    image_paths = [os.path.join(image_dir, img_id) for img_id in sub_df.ImageID]
    
    test_set = TestDataset(image_paths, transforms)
    
    test_dataloader = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False, num_workers=8)
    
    all_predictions = []

    with torch.no_grad():
        for inputs in tqdm(test_dataloader):
            inputs = inputs.to(device)
        
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
        
            all_predictions.extend(preds.cpu().numpy())
    
    all_predictions2 = []

    for p in all_predictions:
        all_predictions2.append(class_names[p])
        
    sub_df[target] = all_predictions2
    
    return sub_df

In [11]:
submission = model_run('mask', submission, test_dir).copy()

  0%|          | 11/18900 [00:00<02:56, 107.17it/s]

*****mask*****
Dataset ImageFolder
    Number of datapoints: 18900
    Root location: /opt/ml/input/cropped_v2.1/train/images_classified_mask/
    StandardTransform
Transform: Compose(
               Resize(size=(224, 224), interpolation=PIL.Image.BILINEAR)
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

['0', '12', '6']

****train, valid split****


100%|██████████| 18900/18900 [00:56<00:00, 333.41it/s]
100%|██████████| 18900/18900 [00:56<00:00, 333.57it/s]
100%|██████████| 18900/18900 [00:56<00:00, 331.90it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

****start epoch****


100%|██████████| 119/119 [00:28<00:00,  4.21it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#0 Loss: 0.1675 Acc: 93.7963% Time: 31.8786s
Valid Loss: 0.04938751205250069 Valid Acc: 98.17459869384766 F1 Score: 0.9701491161397802


100%|██████████| 119/119 [00:27<00:00,  4.31it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#1 Loss: 0.0138 Acc: 99.6230% Time: 63.0363s
Valid Loss: 0.024944811377418104 Valid Acc: 99.10053253173828 F1 Score: 0.9853601176989547


100%|██████████| 119/119 [00:27<00:00,  4.33it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#2 Loss: 0.0050 Acc: 99.8876% Time: 94.2185s
Valid Loss: 0.012671514553210092 Valid Acc: 99.60317993164062 F1 Score: 0.9933975200348962


100%|██████████| 119/119 [00:27<00:00,  4.27it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#3 Loss: 0.0028 Acc: 99.9273% Time: 125.5516s
Valid Loss: 0.03460362219188865 Valid Acc: 98.94180297851562 F1 Score: 0.9837419693606003


100%|██████████| 119/119 [00:27<00:00,  4.26it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#4 Loss: 0.0015 Acc: 99.9537% Time: 157.1026s
Valid Loss: 0.02396445334362354 Valid Acc: 99.23280334472656 F1 Score: 0.9877913858388118


100%|██████████| 119/119 [00:27<00:00,  4.26it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#5 Loss: 0.0005 Acc: 99.9934% Time: 188.6908s
Valid Loss: 0.012563167029693624 Valid Acc: 99.62963104248047 F1 Score: 0.9937761086696605


100%|██████████| 119/119 [00:27<00:00,  4.27it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#6 Loss: 0.0002 Acc: 100.0000% Time: 220.1334s
Valid Loss: 0.014578369163437476 Valid Acc: 99.57672119140625 F1 Score: 0.9930183580714823


100%|██████████| 119/119 [00:27<00:00,  4.33it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#7 Loss: 0.0001 Acc: 100.0000% Time: 251.2650s
Valid Loss: 0.01439959954900948 Valid Acc: 99.60317993164062 F1 Score: 0.9933975200348962


100%|██████████| 119/119 [00:27<00:00,  4.27it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#8 Loss: 0.0001 Acc: 100.0000% Time: 282.8045s
Valid Loss: 0.013314268926698123 Valid Acc: 99.62963104248047 F1 Score: 0.9937761086696605


100%|██████████| 119/119 [00:27<00:00,  4.29it/s]
  0%|          | 0/99 [00:00<?, ?it/s]

#9 Loss: 0.0001 Acc: 100.0000% Time: 314.1588s
Valid Loss: 0.01433407745675785 Valid Acc: 99.60317993164062 F1 Score: 0.9933975200348962


100%|██████████| 99/99 [00:09<00:00, 10.83it/s]


In [12]:
submission = model_run('gender', submission, test_dir).copy()

  0%|          | 34/18900 [00:00<00:56, 333.45it/s]

*****gender*****
Dataset ImageFolder
    Number of datapoints: 18900
    Root location: /opt/ml/input/cropped_v2.1/train/images_classified_gender/
    StandardTransform
Transform: Compose(
               Resize(size=(224, 224), interpolation=PIL.Image.BILINEAR)
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

['0', '3']

****train, valid split****


100%|██████████| 18900/18900 [00:57<00:00, 330.67it/s]
100%|██████████| 18900/18900 [00:57<00:00, 327.54it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

****start epoch****


100%|██████████| 119/119 [00:28<00:00,  4.25it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#0 Loss: 0.2090 Acc: 91.5537% Time: 31.4276s
Valid Loss: 0.09345550277275057 Valid Acc: 97.14361572265625 F1 Score: 0.9697486656307006


100%|██████████| 119/119 [00:27<00:00,  4.29it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#1 Loss: 0.0487 Acc: 98.3597% Time: 62.5849s
Valid Loss: 0.24537892599465855 Valid Acc: 90.18778228759766 F1 Score: 0.8907644535987742


100%|██████████| 119/119 [00:27<00:00,  4.28it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#2 Loss: 0.0237 Acc: 99.2129% Time: 93.9504s
Valid Loss: 0.1140575763192821 Valid Acc: 96.32373046875 F1 Score: 0.9608590296050512


100%|██████████| 119/119 [00:27<00:00,  4.31it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#3 Loss: 0.0039 Acc: 99.9206% Time: 124.8230s
Valid Loss: 0.08510232664682066 Valid Acc: 97.5138931274414 F1 Score: 0.9739023713982433


100%|██████████| 119/119 [00:27<00:00,  4.34it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#4 Loss: 0.0011 Acc: 99.9868% Time: 155.5929s
Valid Loss: 0.09832638301191486 Valid Acc: 97.38164520263672 F1 Score: 0.9723949407207202


100%|██████████| 119/119 [00:27<00:00,  4.26it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#5 Loss: 0.0004 Acc: 100.0000% Time: 186.8870s
Valid Loss: 0.09959314730055946 Valid Acc: 97.46099090576172 F1 Score: 0.9732415987808567


100%|██████████| 119/119 [00:27<00:00,  4.33it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#6 Loss: 0.0002 Acc: 100.0000% Time: 217.9572s
Valid Loss: 0.102018944617931 Valid Acc: 97.59323120117188 F1 Score: 0.9746256525816722


100%|██████████| 119/119 [00:27<00:00,  4.31it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#7 Loss: 0.0001 Acc: 100.0000% Time: 249.1100s
Valid Loss: 0.10593025400178273 Valid Acc: 97.5138931274414 F1 Score: 0.9737658011641207


100%|██████████| 119/119 [00:27<00:00,  4.32it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#8 Loss: 0.0001 Acc: 100.0000% Time: 280.0284s
Valid Loss: 0.10552384362529393 Valid Acc: 97.54033660888672 F1 Score: 0.9740679746164341


100%|██████████| 119/119 [00:27<00:00,  4.36it/s]
  0%|          | 0/99 [00:00<?, ?it/s]

#9 Loss: 0.0001 Acc: 100.0000% Time: 310.7987s
Valid Loss: 0.10835693556879653 Valid Acc: 97.56678009033203 F1 Score: 0.9743305238387885


100%|██████████| 99/99 [00:09<00:00, 10.52it/s]


In [13]:
submission = model_run('age', submission, test_dir).copy()

  0%|          | 14/18900 [00:00<02:18, 136.39it/s]

*****age*****
Dataset ImageFolder
    Number of datapoints: 18900
    Root location: /opt/ml/input/cropped_v2.1/train/images_classified_age/
    StandardTransform
Transform: Compose(
               Resize(size=(224, 224), interpolation=PIL.Image.BILINEAR)
               ToTensor()
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           )

['0', '1', '2']

****train, valid split****


100%|██████████| 18900/18900 [00:57<00:00, 328.68it/s]
100%|██████████| 18900/18900 [00:57<00:00, 328.93it/s]
100%|██████████| 18900/18900 [00:57<00:00, 328.72it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

****start epoch****


100%|██████████| 119/119 [00:28<00:00,  4.23it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#0 Loss: 0.3510 Acc: 86.2094% Time: 31.7207s
Valid Loss: 0.4834965151852636 Valid Acc: 82.35916137695312 F1 Score: 0.6464249203652348


100%|██████████| 119/119 [00:27<00:00,  4.32it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#1 Loss: 0.1826 Acc: 93.2138% Time: 62.6200s
Valid Loss: 0.37551701485054484 Valid Acc: 85.40068817138672 F1 Score: 0.7530800164100007


100%|██████████| 119/119 [00:27<00:00,  4.32it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#2 Loss: 0.1186 Acc: 95.3238% Time: 93.7456s
Valid Loss: 1.145969100380245 Valid Acc: 71.64771270751953 F1 Score: 0.6278183500929594


100%|██████████| 119/119 [00:27<00:00,  4.29it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#3 Loss: 0.0628 Acc: 97.6652% Time: 125.0251s
Valid Loss: 0.8173257976094755 Valid Acc: 81.53927612304688 F1 Score: 0.713242641709714


100%|██████████| 119/119 [00:27<00:00,  4.28it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#4 Loss: 0.0291 Acc: 99.0277% Time: 156.4243s
Valid Loss: 1.5521928255411412 Valid Acc: 73.2345962524414 F1 Score: 0.6548322991133234


100%|██████████| 119/119 [00:27<00:00,  4.30it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#5 Loss: 0.0172 Acc: 99.4973% Time: 187.5138s
Valid Loss: 1.465400308268669 Valid Acc: 75.53556823730469 F1 Score: 0.6738135693539937


100%|██████████| 119/119 [00:27<00:00,  4.29it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#6 Loss: 0.0422 Acc: 98.4986% Time: 218.8241s
Valid Loss: 1.0410242339739355 Valid Acc: 81.08966064453125 F1 Score: 0.7163802974085454


100%|██████████| 119/119 [00:27<00:00,  4.35it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#7 Loss: 0.0023 Acc: 99.9735% Time: 249.5428s
Valid Loss: 1.1183644257606327 Valid Acc: 83.12615966796875 F1 Score: 0.7266913411265054


100%|██████████| 119/119 [00:27<00:00,  4.35it/s]
  0%|          | 0/119 [00:00<?, ?it/s]

#8 Loss: 0.0009 Acc: 99.9934% Time: 280.4103s
Valid Loss: 1.0241032073552045 Valid Acc: 84.47501373291016 F1 Score: 0.7345442875403673


100%|██████████| 119/119 [00:27<00:00,  4.34it/s]
  0%|          | 0/99 [00:00<?, ?it/s]

#9 Loss: 0.0004 Acc: 100.0000% Time: 311.1435s
Valid Loss: 1.1406005663536027 Valid Acc: 84.05184173583984 F1 Score: 0.7299647900211793


100%|██████████| 99/99 [00:09<00:00, 10.63it/s]


In [14]:
submission = submission.astype({'age':'int','gender':'int','mask':'int'})

In [15]:
try : submission['ans'] = submission['mask'] + submission['gender'] + submission['age'] ; submission2 = submission.drop(['mask','gender','age'],axis=1)
except : pass
submission2

Unnamed: 0,ImageID,ans
0,cbc5c6e168e63498590db46022617123f1fe1268.jpg,13
1,0e72482bf56b3581c081f7da2a6180b8792c7089.jpg,2
2,b549040c49190cedc41327748aeb197c1670f14d.jpg,13
3,4f9cb2a045c6d5b9e50ad3459ea7b791eb6e18bc.jpg,13
4,248428d9a4a5b6229a7081c32851b90cb8d38d0c.jpg,12
...,...,...
12595,d71d4570505d6af8f777690e63edfa8d85ea4476.jpg,2
12596,6cf1300e8e218716728d5820c0bab553306c2cfd.jpg,4
12597,8140edbba31c3a824e817e6d5fb95343199e2387.jpg,9
12598,030d439efe6fb5a7bafda45a393fc19f2bf57f54.jpg,1


In [16]:
submission2.to_csv(os.path.join(test_dir, 'submission.csv'), index=False)