In [1]:
!nvidia-smi

Wed Oct 26 08:55:01 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.80.02    Driver Version: 450.80.02    CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-PCIE...  Off  | 00000000:00:05.0 Off |                  Off |
| N/A   41C    P0    37W / 250W |      0MiB / 32510MiB |      9%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import random
import os

In [3]:
os.chdir('input/data')
os.getcwd()

'/opt/ml/input/data'

In [30]:
!find . -regex ".*\.\_[a-zA-Z0-9._]+" -delete

In [4]:
os.chdir('../../')
os.getcwd()

'/opt/ml'

In [3]:
random_seed = 12
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed) # if use multi-GPU
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

In [4]:
train_dir_path = '/opt/ml/input/data/train/'
train_image_path = '/opt/ml/input/data/train/images/'

dt_train = pd.read_csv(train_dir_path+'train.csv')
dt_train

Unnamed: 0,id,gender,race,age,path
0,000001,female,Asian,45,000001_female_Asian_45
1,000002,female,Asian,52,000002_female_Asian_52
2,000004,male,Asian,54,000004_male_Asian_54
3,000005,female,Asian,58,000005_female_Asian_58
4,000006,female,Asian,59,000006_female_Asian_59
...,...,...,...,...,...
2695,006954,male,Asian,19,006954_male_Asian_19
2696,006955,male,Asian,19,006955_male_Asian_19
2697,006956,male,Asian,19,006956_male_Asian_19
2698,006957,male,Asian,20,006957_male_Asian_20


In [5]:
whole_image_path = []
whole_target_label = []

for path in dt_train['path']:
    for file_name in [i for i in os.listdir(train_image_path+path) if './' not in i]:
        whole_image_path.append(train_image_path+path+'/'+file_name)
        whole_target_label.append((path.split('_')[1], path.split('_')[3], file_name.split('.')[0]))

In [6]:
def onehot_enc(x):
    def gender(i):
        if i == 'male':
            return 0
        elif i == 'female':
            return 3
    def age(j):
        j = int(j)
        if j < 30:
            return 0
        elif j >= 30 and j < 60:
            return 1
        elif j >= 60:
            return 2
    def mask(k):
        if k == 'normal':
            return 12
        elif 'incorrect' in k:
            return 6
        else:
            return 0
    return gender(x[0]) + age(x[1]) + mask(x[2])

In [7]:
sr_data = pd.Series(whole_image_path)
sr_label = pd.Series(whole_target_label)

In [8]:
class Dataset_Mask(Dataset):
    def __init__(self, encoding=True, midcrop=True, transform=None):
        self.encoding = encoding
        self.midcrop = midcrop
        self.data = sr_data
        self.label = sr_label
        self.transform = transform
        
        if encoding:
            self.label = self.label.apply(onehot_enc)
        
    def __len__(self):
        return len(sr_data)
    
    def __getitem__(self, idx):
        X = cv2.cvtColor(cv2.imread(self.data[idx]), cv2.COLOR_BGR2RGB)
        y = self.label[idx]
        
        if self.midcrop:
            X = X[64:448]
        
        if self.transform:
            return self.transform(X), y
        return X, y

In [9]:
dataset_mask = Dataset_Mask(transform = transforms.Compose([
                                transforms.ToTensor()
                            ]))

In [10]:
train_size = int(len(dataset_mask) * 0.8)
val_size = int(len(dataset_mask) * 0.2)

In [11]:
print(len(dataset_mask))

18900


In [12]:
mask_train_set, mask_val_set = torch.utils.data.random_split(dataset_mask, [train_size, val_size])
print(f'training data size : {len(mask_train_set)}')
print(f'validation data size : {len(mask_val_set)}')

training data size : 15120
validation data size : 3780


In [13]:
batch_size = 32
train_dataloader_mask = DataLoader(dataset = mask_train_set, batch_size=batch_size, shuffle=True, num_workers=2)
val_dataloader_mask = DataLoader(dataset = mask_val_set, batch_size=batch_size, shuffle=True, num_workers=2)

In [14]:
basemodel_resnet101 = torchvision.models.resnet101(pretrained=True)
print('필요 입력 채널 개수', basemodel_resnet101.conv1.weight.shape[1])
print('네트워크 출력 채널 개수', basemodel_resnet101.fc.weight.shape[0])
print(basemodel_resnet101)

필요 입력 채널 개수 3
네트워크 출력 채널 개수 1000
ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256

In [15]:
import math
class_num = 18
basemodel_resnet101.fc = nn.Linear(in_features=2048, out_features=class_num, bias=True)
nn.init.xavier_uniform_(basemodel_resnet101.fc.weight)
stdv = 1. / math.sqrt(basemodel_resnet101.fc.weight.size(1))
basemodel_resnet101.fc.bias.data.uniform_(-stdv, stdv)

print('필요 입력 채널 개수', basemodel_resnet101.conv1.weight.shape[1])
print('네트워크 출력 채널 개수', basemodel_resnet101.fc.weight.shape[0])

필요 입력 채널 개수 3
네트워크 출력 채널 개수 18


In [16]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"using {device}")

basemodel_resnet101.to(device)

LEARNING_RATE = 0.0001
NUM_EPOCH = 30

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(basemodel_resnet101.parameters(), lr=LEARNING_RATE)

using cuda:0


In [17]:
np.set_printoptions(precision=3)
n_param = 0
for p_idx, (param_name, param) in enumerate(basemodel_resnet101.named_parameters()):
    if param.requires_grad:
        param_numpy = param.detach().cpu().numpy()
        n_param += len(param_numpy.reshape(-1))
        print ("[%d] name:[%s] shape:[%s]."%(p_idx,param_name,param_numpy.shape))
        print ("    val:%s"%(param_numpy.reshape(-1)[:5]))
print ("Total number of parameters:[%s]."%(format(n_param,',d')))

[0] name:[conv1.weight] shape:[(64, 3, 7, 7)].
    val:[ 0.02  -0.004 -0.018 -0.028 -0.015]
[1] name:[bn1.weight] shape:[(64,)].
    val:[0.261 0.195 0.273 0.425 0.283]
[2] name:[bn1.bias] shape:[(64,)].
    val:[0.197 0.223 0.184 1.066 0.681]
[3] name:[layer1.0.conv1.weight] shape:[(64, 64, 1, 1)].
    val:[-0.187  0.024 -0.022 -0.055  0.022]
[4] name:[layer1.0.bn1.weight] shape:[(64,)].
    val:[2.146e-01 1.453e-01 6.095e-09 1.160e-01 2.530e-01]
[5] name:[layer1.0.bn1.bias] shape:[(64,)].
    val:[ 4.568e-02 -3.505e-02 -3.354e-08  1.901e-01 -3.316e-03]
[6] name:[layer1.0.conv2.weight] shape:[(64, 64, 3, 3)].
    val:[ 0.013 -0.045  0.027  0.028  0.067]
[7] name:[layer1.0.bn2.weight] shape:[(64,)].
    val:[0.139 0.149 0.148 0.132 0.155]
[8] name:[layer1.0.bn2.bias] shape:[(64,)].
    val:[ 7.826e-02 -2.417e-04  3.763e-01  6.085e-02 -3.057e-02]
[9] name:[layer1.0.conv3.weight] shape:[(256, 64, 1, 1)].
    val:[ 3.798e-05  1.875e-03 -2.079e-04 -7.436e-04  2.426e-03]
[10] name:[layer1.0

In [18]:
best_val_acc = 0
best_val_loss = np.inf

for epoch in range(NUM_EPOCH):
    basemodel_resnet101.train()
    loss_value = 0
    matches = 0
    for train_batch in train_dataloader_mask:
        inputs, labels = train_batch
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        outs = basemodel_resnet101(inputs)
        preds = torch.argmax(outs, dim=-1)
        loss = criterion(outs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        loss_value += loss.item()
        matches += (preds == labels).sum().item()
        
        train_loss = loss_value / batch_size
        train_acc = matches / batch_size
        print(f"epoch[{epoch}/{NUM_EPOCH}] training loss {train_loss:.4f}, training accuracy {train_acc:.4f}")
        
        loss_value = 0
        matches = 0
        
    with torch.no_grad():
        basemodel_resnet101.eval()
        val_loss_items = []
        val_acc_items = []
        for val_batch in val_dataloader_mask:
            inputs, labels = val_batch
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            outs = basemodel_resnet101(inputs)
            preds = torch.argmax(outs, dim=-1)
            
            loss_item = criterion(outs, labels).item()
            acc_item = (labels==preds).sum().item()
            val_loss_items.append(loss_item)
            val_acc_items.append(acc_item)
            
        val_loss = np.sum(val_loss_items) / len(val_dataloader_mask)
        val_acc = np.sum(val_acc_items) / len(mask_val_set)
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            
        print(f"[val] acc : {val_acc:.4f}, loss : {val_loss:.4f}")
        print(f"best acc : {best_val_acc:.4f}, best loss : {best_val_loss:.4f}")

epoch[0/30] training loss 0.0929, training accuracy 0.0938
epoch[0/30] training loss 0.0917, training accuracy 0.0000
epoch[0/30] training loss 0.0791, training accuracy 0.2188
epoch[0/30] training loss 0.0738, training accuracy 0.5000
epoch[0/30] training loss 0.0633, training accuracy 0.4375
epoch[0/30] training loss 0.0677, training accuracy 0.4375
epoch[0/30] training loss 0.0667, training accuracy 0.4688
epoch[0/30] training loss 0.0468, training accuracy 0.5625
epoch[0/30] training loss 0.0481, training accuracy 0.5938
epoch[0/30] training loss 0.0504, training accuracy 0.5000
epoch[0/30] training loss 0.0357, training accuracy 0.6875
epoch[0/30] training loss 0.0432, training accuracy 0.5938
epoch[0/30] training loss 0.0260, training accuracy 0.7500
epoch[0/30] training loss 0.0331, training accuracy 0.7188
epoch[0/30] training loss 0.0246, training accuracy 0.8750
epoch[0/30] training loss 0.0294, training accuracy 0.8125
epoch[0/30] training loss 0.0302, training accuracy 0.68

KeyboardInterrupt: 

In [19]:
# meta 데이터와 이미지 경로를 불러옵니다.
test_dir_path = '/opt/ml/input/data/eval/'
test_image_path = '/opt/ml/input/data/eval/images/'

submission = pd.read_csv(test_dir_path+'info.csv')
submission.head()

Unnamed: 0,ImageID,ans
0,cbc5c6e168e63498590db46022617123f1fe1268.jpg,0
1,0e72482bf56b3581c081f7da2a6180b8792c7089.jpg,0
2,b549040c49190cedc41327748aeb197c1670f14d.jpg,0
3,4f9cb2a045c6d5b9e50ad3459ea7b791eb6e18bc.jpg,0
4,248428d9a4a5b6229a7081c32851b90cb8d38d0c.jpg,0


In [20]:
image_paths = [os.path.join(test_image_path, img_id) for img_id in submission.ImageID]
test_image = pd.Series(image_paths)

In [21]:
class Test_Dataset(Dataset):
    def __init__(self, midcrop=True, transform=None):
        self.midcrop = midcrop
        self.data = test_image
        self.transform = transform
        
    def __len__(self):
        return len(test_image)
    
    def __getitem__(self, idx):
        img = cv2.cvtColor(cv2.imread(self.data[idx]), cv2.COLOR_BGR2RGB)
        
        if self.midcrop:
            img = img[64:448]
            
        if self.transform:
            img = self.transform(img)
            
        return img

In [22]:
dataset = Test_Dataset(transform = transforms.Compose([
                            transforms.ToTensor()
                        ]))

loader = DataLoader(
    dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=2
)

# 모델을 정의합니다. (학습한 모델이 있다면 torch.load로 모델을 불러주세요!)
device = torch.device('cuda')
model = basemodel_resnet101.to(device)
model.eval()

# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
all_predictions = []
for images in loader:
    with torch.no_grad():
        images = images.to(device)
        pred = model(images)
        pred = pred.argmax(dim=-1)
        all_predictions.extend(pred.cpu().numpy())
submission['ans'] = all_predictions

# 제출할 파일을 저장합니다.
submission.to_csv(os.path.join(test_dir_path, 'submission_resnet101.csv'), index=False)
print('test inference is done!')

test inference is done!
