In [1]:
import os
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import wandb

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from torchvision import transforms
from torchvision.transforms import Resize, ToTensor, Normalize

In [2]:
print('CUDA GPU available : {}'.format(torch.cuda.is_available()))
try:
    print('{} GPU(s) is(are) allocated'.format(torch.cuda.device_count()))
except:
    print('GPUs are not allocated. Current runtime is on CPU.')
device = torch.device("cuda")

CUDA GPU available : True
1 GPU(s) is(are) allocated


In [3]:
# 테스트 데이터셋 폴더 경로를 지정해주세요.
test_dir = '/opt/ml/input/data/eval'
train_dir = '/opt/ml/input/data/train'

## Dataset

In [4]:
class TrainDataset(Dataset):
    def __init__(self, train_dir, is_Train=True, transform=None):
        super().__init__()
        
        csv_path = os.path.join(train_dir, 'train.csv')
        csv = pd.read_csv(csv_path)
        self.image_dir = os.path.join(train_dir, 'images')
        self.transform = transform
        self.image_path = []
        path = csv['path']
        
        for p in path:
            images = [os.path.join(*[self.image_dir, p, image]) for image in os.listdir(os.path.join(self.image_dir, p)) if not image[:1] == '.']
            for image in images:
                self.image_path.append(image)
                
        self.comb_dic = {}
        comb = [(m, g, a) for m in ['m', 'i', 'n'] for g in ['male', 'female'] for a in [0, 1, 2]]
        for i, (m, g, a) in enumerate(comb):
            self.comb_dic[(m, g, a)] = i

    def __len__(self):
        return len(self.image_path)

    def __getitem__(self, idx):
        image_name = self.image_path[idx]
        image = Image.open(image_name)
        
        features = image_name.split('/')[-2:]
        mask = features[1][0]
        age = int(features[0].split('_')[-1])
        gender = features[0].split('_')[1]
        
        if age >= 60:
            age = 2
        elif age >= 30:
            age = 1
        else:
            age = 0        

        target = self.comb_dic[(mask, gender, age)]
        
        if self.transform:
            image = self.transform(image)
        
        return image, target

class TestDataset(Dataset):
    def __init__(self, img_paths, transform):
        self.img_paths = img_paths
        self.transform = transform

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])

        if self.transform:
            image = self.transform(image)
        return image

    def __len__(self):
        return len(self.img_paths)

In [5]:
def check_accuracy(loader, model, device):
    num_correct = 0
    num_samples = 0
    model.eval()
    
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)
            
            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)
        
        print(f'Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}') 
    wandb.log({"validation acc" : float(num_correct)/float(num_samples)*100})
    model.train()

In [6]:
from torch.utils.data.dataset import random_split
tfms = transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),])

dataset = TrainDataset(train_dir, transform=tfms)
train_dataset, val_dataset = random_split(dataset, [int(len(dataset)*0.8),int(len(dataset)*0.2)])
print(len(dataset))
# plt.imshow(np.array(train[1][0].permute(1,2,0)))
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=1, drop_last=False)
val_loader   = DataLoader(dataset=val_dataset, batch_size=32)

54900


## Model

In [7]:
from efficientnet_pytorch import EfficientNet
model = EfficientNet.from_pretrained('efficientnet-b6', num_classes=18).to(device)

Loaded pretrained weights for efficientnet-b6


In [8]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [9]:
model_path = '/opt/ml/weights/'
model.load_state_dict(torch.load(model_path + 'model.pt'))

<All keys matched successfully>

In [20]:
from sklearn.metrics import confusion_matrix

model.eval()
y_true = []
y_predicted = []
with torch.no_grad():
    for x, y in val_loader:
        y_true += y.tolist()
        
        x = x.to(device=device)
        y = y.to(device=device)

        
        scores = model(x)
        _, predictions = scores.max(1)
        y_predicted += predictions.tolist()

cm = confusion_matrix(y_true, y_predicted)
F1 = []
for c in range(18):
    precision = cm[c][c] / np.sum(cm, axis=0)[c]
    recall = cm[c][c] / np.sum(cm, axis=1)[c]
    F1.append(2 * precision * recall / (precision + recall))
macro_F1 = np.mean(F1)
model.train()

KeyboardInterrupt: 

In [37]:
F1 = []
for c in range(18):
    precision = cm[c][c] / np.sum(cm, axis=0)[c]
    recall = cm[c][c] / np.sum(cm, axis=1)[c]
    F1.append(2 * precision * recall / (precision + recall))
macro_F1 = np.mean(F1)

0.9958244558350012