In [1]:
import torch, gc
gc.collect()
torch.cuda.empty_cache()

In [2]:
import GPUtil
GPUtil.showUtilization()

| ID | GPU | MEM |
------------------
|  0 |  0% |  0% |


In [3]:
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
#from tqdm.notebook import tqdm
from tqdm import tqdm
import random
import os
from torchmetrics import F1Score

In [4]:
random_seed = 12
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed) # if use multi-GPU
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

In [5]:
train_dir_path = '/opt/ml/input/data/train/'
train_image_path = '/opt/ml/input/data/train/images/'

dt_train = pd.read_csv(train_dir_path+'train.csv')
exp_train = pd.read_csv(train_dir_path+'expanded_train.csv')

def get_age_range(age):
    if age < 30:
        return 0
    elif 30 <= age < 60:
        return 1
    else:
        return 2
dt_train['age_range'] = dt_train['age'].apply(lambda x : get_age_range(x))
dt_train

Unnamed: 0,id,gender,race,age,path,age_range
0,000001,female,Asian,45,000001_female_Asian_45,1
1,000002,female,Asian,52,000002_female_Asian_52,1
2,000004,male,Asian,54,000004_male_Asian_54,1
3,000005,female,Asian,58,000005_female_Asian_58,1
4,000006,female,Asian,59,000006_female_Asian_59,1
...,...,...,...,...,...,...
2695,006954,male,Asian,19,006954_male_Asian_19,0
2696,006955,male,Asian,19,006955_male_Asian_19,0
2697,006956,male,Asian,19,006956_male_Asian_19,0
2698,006957,male,Asian,20,006957_male_Asian_20,0


In [6]:
train_idx, valid_idx = train_test_split(np.arange(len(dt_train)),
                                       test_size=0.2,
                                       shuffle=True,
                                       stratify=dt_train['age_range'])
dt_train.iloc[train_idx].head()

Unnamed: 0,id,gender,race,age,path,age_range
49,77,male,Asian,59,000077_male_Asian_59,1
2141,5420,female,Asian,48,005420_female_Asian_48,1
2029,5106,female,Asian,54,005106_female_Asian_54,1
574,1217,female,Asian,25,001217_female_Asian_25,0
2188,5478,female,Asian,43,005478_female_Asian_43,1


In [7]:
split_exp_train = exp_train[exp_train['PersonID'].isin(list(dt_train.iloc[train_idx]["id"]))]
split_exp_valid = exp_train[exp_train['PersonID'].isin(list(dt_train.iloc[valid_idx]["id"]))]
print(f"index size: {len(train_idx)} == file estimate: {len(train_idx) * 7} == split size: {len(split_exp_train)}")
split_exp_train

index size: 2160 == file estimate: 15120 == split size: 15127


Unnamed: 0.1,Unnamed: 0,PersonID,Filename,Class,Mask,Gender,Age,Age_Class,Has_Face,BBoxX1,...,LE_X,LE_Y,RE_X,RE_Y,N_X,N_Y,LM_X,LM_Y,RM_X,RM_Y
0,0,000001,../input/data/train/images/000001_female_Asian...,4,0,1,45,1,True,112,...,145,230,206,230,176,268,158,296,198,297
1,1,000001,../input/data/train/images/000001_female_Asian...,4,0,1,45,1,True,120,...,150,223,210,219,182,257,165,289,203,287
2,2,000001,../input/data/train/images/000001_female_Asian...,4,0,1,45,1,True,107,...,134,205,193,206,162,243,145,274,184,274
3,3,000001,../input/data/train/images/000001_female_Asian...,4,0,1,45,1,True,110,...,139,232,209,234,171,275,151,307,194,309
4,4,000001,../input/data/train/images/000001_female_Asian...,10,1,1,45,1,True,122,...,152,205,211,202,182,245,164,275,205,273
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18888,18888,006957,../input/data/train/images/006957_male_Asian_2...,0,0,0,20,0,True,111,...,152,235,237,233,195,296,168,332,230,330
18889,18889,006957,../input/data/train/images/006957_male_Asian_2...,0,0,0,20,0,True,101,...,143,235,222,230,185,283,159,319,218,314
18890,18890,006957,../input/data/train/images/006957_male_Asian_2...,6,1,0,20,0,True,107,...,146,234,218,236,181,285,160,322,209,323
18891,18891,006957,../input/data/train/images/006957_male_Asian_2...,0,0,0,20,0,True,103,...,144,220,219,217,185,269,159,304,212,302


In [8]:
train_image = split_exp_train.loc[:,"Filename"]
train_label = split_exp_train.loc[:,"Class"]

valid_image = split_exp_valid.loc[:,"Filename"]
valid_label = split_exp_valid.loc[:,"Class"]

In [9]:
train_data = pd.Series(train_image)
train_label = pd.Series(train_label)

valid_data = pd.Series(valid_image)
valid_label = pd.Series(valid_label)

In [10]:
from torchvision.transforms import Resize, ToTensor, Normalize, Compose, CenterCrop, ColorJitter
from PIL import Image


class Dataset_Mask(Dataset):
    def __init__(self, data, label, encoding=True, midcrop=True, transform=None, is_train=True):
        self.encoding = encoding
        self.midcrop = midcrop
        self.data = data.reset_index(drop=True)
        self.label = label.reset_index(drop=True)
        self.transform = transform
        self.is_train = is_train

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        X = np.load(self.data[idx])
        y = self.label[idx]
        
        if self.midcrop:
            # X = X[64:448]
            source_df = split_exp_train if self.is_train else split_exp_valid
            x1 = source_df.iloc[idx]['BBoxX1']
            y1 = source_df.iloc[idx]['BBoxY1']
            x2 = source_df.iloc[idx]['BBoxX2']
            y2 = source_df.iloc[idx]['BBoxY2']
        X = cv2.resize(X, (224, 224))
        
        if self.transform:
            return self.transform(X), y

        return X, y

In [11]:
mask_train_set = Dataset_Mask(data=train_data, label=train_label, is_train=True, transform=transforms.Compose([
                                    ToTensor(),
                                    Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
                            ]))

In [12]:
mask_val_set = Dataset_Mask(data=valid_data, label=valid_label, is_train=False, transform = transforms.Compose([
                                    ToTensor(),
                                    Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
                            ]))

In [13]:
#t_image = [mask_train_set[i][1] for i in tqdm(range(len(mask_train_set)))]
#v_image = [mask_val_set[i][1] for i in tqdm(range(len(mask_val_set)))]

In [14]:
#t_df = pd.DataFrame(t_image, columns=['counts'])
#v_df = pd.DataFrame(v_image, columns=['counts'])

In [15]:
#import seaborn as sns
#
#fig, axes = plt.subplots(1, 2, figsize=(15, 5))
#
#sns.countplot(x='counts', data=t_df, ax=axes[0])
#axes[0].set_xlabel("train set labels")
#sns.countplot(x='counts', data=v_df, ax=axes[1])
#axes[1].set_xlabel("valid set labels")

In [16]:
#print(f'training data size : {len(mask_train_set)}')
#print(f'validation data size : {len(mask_val_set)}')

In [17]:
batch_size = 256

train_dataloader_mask = DataLoader(dataset = mask_train_set, batch_size=batch_size, num_workers=8)
val_dataloader_mask = DataLoader(dataset = mask_val_set, batch_size=batch_size, num_workers=8)

In [18]:
basemodel_resnet34 = torchvision.models.resnet34(pretrained=True)
print('필요 입력 채널 개수', basemodel_resnet34.conv1.weight.shape[1])
print('네트워크 출력 채널 개수', basemodel_resnet34.fc.weight.shape[0])
print(basemodel_resnet34)

필요 입력 채널 개수 3
네트워크 출력 채널 개수 1000
ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  

In [19]:
import math
class_num = 18
basemodel_resnet34.fc = nn.Linear(in_features=512, out_features=class_num, bias=True)
nn.init.xavier_uniform_(basemodel_resnet34.fc.weight)
stdv = 1. / math.sqrt(basemodel_resnet34.fc.weight.size(1))
basemodel_resnet34.fc.bias.data.uniform_(-stdv, stdv)

print('필요 입력 채널 개수', basemodel_resnet34.conv1.weight.shape[1])
print('네트워크 출력 채널 개수', basemodel_resnet34.fc.weight.shape[0])

필요 입력 채널 개수 3
네트워크 출력 채널 개수 18


In [20]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"using {device}")

if torch.cuda.device_count() > 1:
    basemodel_resnet34 = torch.nn.DataParallel(basemodel_resnet34)

basemodel_resnet34.to(device)

#LEARNING_RATE = 0.0001
NUM_EPOCH = 100

criterion = nn.CrossEntropyLoss()
#optimizer = torch.optim.Adam(basemodel_resnet34.parameters(), lr=LEARNING_RATE)


lr = 1e-3
betas = (0.9, 0.999)
weight_decay = 0.5e-2
eps = 1e-8
optimizer = torch.optim.AdamW(basemodel_resnet34.parameters(), lr=lr, betas=betas, weight_decay=weight_decay, eps=eps)

#scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)

using cuda:0


In [21]:
np.set_printoptions(precision=3)
n_param = 0
for p_idx, (param_name, param) in enumerate(basemodel_resnet34.named_parameters()):
    if param.requires_grad:
        param_numpy = param.detach().cpu().numpy()
        n_param += len(param_numpy.reshape(-1))
        print ("[%d] name:[%s] shape:[%s]."%(p_idx,param_name,param_numpy.shape))
        print ("    val:%s"%(param_numpy.reshape(-1)[:5]))
print ("Total number of parameters:[%s]."%(format(n_param,',d')))

[0] name:[conv1.weight] shape:[(64, 3, 7, 7)].
    val:[ 0.005 -0.007  0.008  0.038  0.049]
[1] name:[bn1.weight] shape:[(64,)].
    val:[0.302 0.268 0.26  0.311 0.238]
[2] name:[bn1.bias] shape:[(64,)].
    val:[0.481 0.207 0.331 0.38  0.094]
[3] name:[layer1.0.conv1.weight] shape:[(64, 64, 3, 3)].
    val:[-0.005  0.015 -0.006 -0.06  -0.024]
[4] name:[layer1.0.bn1.weight] shape:[(64,)].
    val:[0.24  0.185 0.216 0.165 0.181]
[5] name:[layer1.0.bn1.bias] shape:[(64,)].
    val:[0.025 0.088 0.082 0.142 0.066]
[6] name:[layer1.0.conv2.weight] shape:[(64, 64, 3, 3)].
    val:[ 0.066 -0.01   0.041  0.033 -0.055]
[7] name:[layer1.0.bn2.weight] shape:[(64,)].
    val:[0.34  0.187 0.252 0.307 0.259]
[8] name:[layer1.0.bn2.bias] shape:[(64,)].
    val:[-0.251  0.196  0.23  -0.114  0.07 ]
[9] name:[layer1.1.conv1.weight] shape:[(64, 64, 3, 3)].
    val:[-0.008 -0.04  -0.054 -0.019  0.011]
[10] name:[layer1.1.bn1.weight] shape:[(64,)].
    val:[0.178 0.373 0.18  0.26  0.246]
[11] name:[layer1.

In [22]:
best_val_acc = 0
best_val_loss = np.inf
patience = 10
cur_count = 0

f1 = F1Score(num_classes=class_num, average='macro').to(device)
best_f1_score = 0

for epoch in range(NUM_EPOCH):
    basemodel_resnet34.train()
    loss_value = 0
    matches = 0
    for train_batch in tqdm(train_dataloader_mask):
        inputs, labels = train_batch
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        outs = basemodel_resnet34(inputs)
        preds = torch.argmax(outs, dim=-1)
        loss = criterion(outs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()
        
        if epoch % 10 == 0:
            torch.save(basemodel_resnet34, '../checkpoint/resnet34_with_detect/checkpoint_ep_%d.pth'% epoch)
        
        loss_value += loss.item()
        matches += (preds == labels).sum().item()
        
        train_loss = loss_value / batch_size
        train_acc = matches / batch_size
        
        loss_value = 0
        matches = 0
    print(f"epoch[{epoch}/{NUM_EPOCH}] training loss {train_loss:.3f}, training accuracy {train_acc:.3f}")
        
    with torch.no_grad():
        basemodel_resnet34.eval()
        val_loss_items = []
        val_acc_items = []
        for val_batch in tqdm(val_dataloader_mask):
            inputs, labels = val_batch
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            outs = basemodel_resnet34(inputs)
            preds = torch.argmax(outs, dim=-1)
            
            loss_item = criterion(outs, labels).item()
            acc_item = (labels==preds).sum().item()
            val_loss_items.append(loss_item)
            val_acc_items.append(acc_item)
            
        val_loss = np.sum(val_loss_items) / len(val_dataloader_mask)
        val_acc = np.sum(val_acc_items) / len(mask_val_set)

        f1_score = f1(outs, labels)
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            
        if f1_score > best_f1_score:
            best_f1_score = f1_score
#             cur_count = 0
            torch.save(basemodel_resnet34, '../checkpoint/resnet34_with_detect/checkpoint_best.pth')
#         else:
#             cur_count += 1
#             if cur_count >= patience:
#                 print("Early Stopping!")
#                 break
            
            
        print(f"[val] acc : {val_acc:.3f}, loss : {val_loss:.3f}, f1 score: {f1_score:.3f}")
        print(f"best acc : {best_val_acc:.3f}, best loss : {best_val_loss:.3f}, best f1 : {best_f1_score:.3f}")

100%|██████████| 60/60 [01:08<00:00,  1.13s/it]
  0%|          | 0/15 [00:00<?, ?it/s]

epoch[0/100] training loss 0.028, training accuracy 0.000


100%|██████████| 15/15 [00:04<00:00,  3.16it/s]
  0%|          | 0/60 [00:00<?, ?it/s]

[val] acc : 0.379, loss : 4.866, f1 score: 0.000
best acc : 0.379, best loss : 4.866, best f1 : 0.000


100%|██████████| 60/60 [00:40<00:00,  1.49it/s]
  0%|          | 0/15 [00:00<?, ?it/s]

epoch[1/100] training loss 0.028, training accuracy 0.000


100%|██████████| 15/15 [00:04<00:00,  3.22it/s]
  0%|          | 0/60 [00:00<?, ?it/s]

[val] acc : 0.379, loss : 4.861, f1 score: 0.000
best acc : 0.379, best loss : 4.861, best f1 : 0.000


100%|██████████| 60/60 [00:40<00:00,  1.48it/s]
  0%|          | 0/15 [00:00<?, ?it/s]

epoch[2/100] training loss 0.028, training accuracy 0.000


100%|██████████| 15/15 [00:04<00:00,  3.18it/s]
  0%|          | 0/60 [00:00<?, ?it/s]

[val] acc : 0.379, loss : 4.861, f1 score: 0.000
best acc : 0.379, best loss : 4.861, best f1 : 0.000


100%|██████████| 60/60 [00:40<00:00,  1.48it/s]
  0%|          | 0/15 [00:00<?, ?it/s]

epoch[3/100] training loss 0.028, training accuracy 0.000


100%|██████████| 15/15 [00:04<00:00,  3.20it/s]
  0%|          | 0/60 [00:00<?, ?it/s]

[val] acc : 0.379, loss : 4.861, f1 score: 0.000
best acc : 0.379, best loss : 4.861, best f1 : 0.000


100%|██████████| 60/60 [00:40<00:00,  1.48it/s]
  0%|          | 0/15 [00:00<?, ?it/s]

epoch[4/100] training loss 0.028, training accuracy 0.000


100%|██████████| 15/15 [00:04<00:00,  3.17it/s]
  0%|          | 0/60 [00:00<?, ?it/s]

[val] acc : 0.379, loss : 4.861, f1 score: 0.000
best acc : 0.379, best loss : 4.861, best f1 : 0.000


100%|██████████| 60/60 [00:40<00:00,  1.48it/s]
  0%|          | 0/15 [00:00<?, ?it/s]

epoch[5/100] training loss 0.028, training accuracy 0.000


100%|██████████| 15/15 [00:04<00:00,  3.17it/s]
  0%|          | 0/60 [00:00<?, ?it/s]

[val] acc : 0.379, loss : 4.861, f1 score: 0.000
best acc : 0.379, best loss : 4.861, best f1 : 0.000


100%|██████████| 60/60 [00:40<00:00,  1.49it/s]
  0%|          | 0/15 [00:00<?, ?it/s]

epoch[6/100] training loss 0.028, training accuracy 0.000


100%|██████████| 15/15 [00:04<00:00,  3.12it/s]
  0%|          | 0/60 [00:00<?, ?it/s]

[val] acc : 0.379, loss : 4.861, f1 score: 0.000
best acc : 0.379, best loss : 4.861, best f1 : 0.000


 63%|██████▎   | 38/60 [00:26<00:15,  1.41it/s]


KeyboardInterrupt: 

In [None]:
# meta 데이터와 이미지 경로를 불러옵니다.
test_dir_path = '/opt/ml/input/data/eval/'
test_image_path = '/opt/ml/input/data/eval/images/'

submission = pd.read_csv(test_dir_path+'info.csv')
submission.head()

In [None]:
image_paths = [os.path.join(test_image_path, img_id) for img_id in submission.ImageID]
test_image = pd.Series(image_paths)

In [None]:
class Test_Dataset(Dataset):
    def __init__(self, midcrop=True, transform=None):
        self.midcrop = midcrop
        self.data = test_image
        self.transform = transform
        
    def __len__(self):
        return len(test_image)
    
    def __getitem__(self, idx):
        img = cv2.cvtColor(cv2.imread(self.data[idx]), cv2.COLOR_BGR2RGB)
        
        if self.midcrop:
            img = img[64:448]
            
        if self.transform:
            img = self.transform(img)
            
        return img

In [None]:
dataset = Test_Dataset(transform = transforms.Compose([
                            transforms.ToTensor()
                        ]))

loader = DataLoader(
    dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=2
)

# 모델을 정의합니다. (학습한 모델이 있다면 torch.load로 모델을 불러주세요!)
device = torch.device('cuda')
model = basemodel_resnet34.to(device)
model.eval()

# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
all_predictions = []
for images in loader:
    with torch.no_grad():
        images = images.to(device)
        pred = model(images)
        pred = pred.argmax(dim=-1)
        all_predictions.extend(pred.cpu().numpy())
submission['ans'] = all_predictions

# 제출할 파일을 저장합니다.
submission.to_csv(os.path.join(test_dir_path, 'submission.csv'), index=False)
print('test inference is done!')