In [None]:
from google.colab import drive

drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
import torchsummary
import numpy as np
from PIL import Image
from PIL import ImageFile
import json
import time
import random
import glob

ImageFile.LOAD_TRUNCATED_IMAGES = True

In [None]:
def seed_everything(seed=42):
    random.seed(seed)  # Python random 모듈
    np.random.seed(seed)  # Numpy 랜덤 시드
    torch.manual_seed(seed)  # PyTorch 랜덤 시드
    torch.cuda.manual_seed(seed)  # GPU를 위한 PyTorch 랜덤 시드
    torch.cuda.manual_seed_all(seed)  # 멀티 GPU를 위한 PyTorch 랜덤 시드

seed_everything()

In [None]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, data_root, label_root, transform=None):
        self.transform = transform
        self.items = []

        # 모든 이미지 경로 수집
        data_paths = glob.glob(os.path.join(data_root, '*/*.png'))

        # 모든 라벨 경로를 소문자로 변환하여 저장
        label_paths_lower = {os.path.join(label_root, path).lower(): path for path in glob.glob(os.path.join(label_root, '*/*.json'))}

        for image_path in data_paths:
            base_name = os.path.splitext(os.path.basename(image_path))[0].lower()
            label_dir = os.path.dirname(image_path).replace(data_root, label_root).lower()
            label_path_lower = os.path.join(label_dir, base_name + '.json').lower()

            # 소문자로 변환된 경로를 사용하여 실제 라벨 경로 찾기
            actual_label_path = label_paths_lower.get(label_path_lower)
            if actual_label_path:
                self.items.append((image_path, actual_label_path))
            else:
                print(f"Label file not found for image: {image_path}")
                print(f"Expected label path: {actual_label_path}")

    def __len__(self):
        return len(self.items)

    def __getitem__(self, index):
        image_path, label_path = self.items[index]

        try:
            image = Image.open(image_path).convert('RGB')
        except Exception as e:
            return None

        with open(label_path, 'r') as f:
            label_data = json.load(f)

        age_past = label_data['age_past']
        box = label_data['annotation'][0]['box']
        image = image.crop((box['x'], box['y'], box['x'] + box['w'], box['y'] + box['h']))

        if self.transform:
            image = self.transform(image)

        gender_label = 0 if gender == 'male' else 1
        label = {'age_past': age_past}

        return image, label


In [None]:
def custom_collate_fn(batch):
    batch = [item for item in batch if item is not None]
    return torch.utils.data.dataloader.default_collate(batch)

In [None]:
# 랜덤하게 적용
random_transforms = transforms.RandomApply([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
], p=0.5)

# 항상 적용
always_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.6284, 0.4901, 0.4325], std=[0.1869, 0.1712, 0.1561]),
])

transform_train = transforms.Compose([
    random_transforms,
    always_transforms
])

transform_val = transforms.Compose([
    always_transforms
])

In [None]:
train_dataset = CustomDataset('/content/drive/MyDrive/DL_DATA/DL_Face_REC/train/source', '/content/drive/MyDrive/DL_DATA/DL_Face_REC/train/label', transform=transform_train)
val_dataset = CustomDataset('/content/drive/MyDrive/DL_DATA/DL_Face_REC/val/source', '/content/drive/MyDrive/DL_DATA/DL_Face_REC/val/label', transform=transform_val)

Label file not found for image: /content/drive/MyDrive/DL_DATA/DL_Face_REC/train/source/0622/0622_2002_20_00000021_D.png
Expected label path: None
Label file not found for image: /content/drive/MyDrive/DL_DATA/DL_Face_REC/train/source/0622/0622_2002_21_00000031_D.png
Expected label path: None
Label file not found for image: /content/drive/MyDrive/DL_DATA/DL_Face_REC/train/source/0622/0622_2002_20_00000019_D.png
Expected label path: None
Label file not found for image: /content/drive/MyDrive/DL_DATA/DL_Face_REC/train/source/0622/0622_2002_20_00000020_D.png
Expected label path: None
Label file not found for image: /content/drive/MyDrive/DL_DATA/DL_Face_REC/train/source/0622/0622_2002_20_00000023_D.png
Expected label path: None
Label file not found for image: /content/drive/MyDrive/DL_DATA/DL_Face_REC/train/source/0622/0622_2002_14_00000006_D.png
Expected label path: None
Label file not found for image: /content/drive/MyDrive/DL_DATA/DL_Face_REC/train/source/0622/0622_2002_15_00000008_D.p

In [None]:
batch_size = 128

train_loader = torch.utils.data.DataLoader(
    dataset=train_dataset,
    collate_fn=custom_collate_fn,
    batch_size=batch_size,
    shuffle=True,
    num_workers=2,
    prefetch_factor=4
)

val_loader = torch.utils.data.DataLoader(
    dataset=val_dataset,
    collate_fn=custom_collate_fn,
    batch_size=batch_size,
    num_workers=2,
    prefetch_factor=4
)

In [None]:
len(train_loader), len(val_loader)

(314, 40)

In [None]:
x, y = next(iter(train_loader))
x.shape

torch.Size([128, 3, 224, 224])

In [None]:
y['age_past'].shape, y['gender'].shape

(torch.Size([128]), torch.Size([128]))

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [None]:
def conv3x3(in_planes, out_planes, stride=1):
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)

def conv1x1(in_planes, out_planes, stride=1):
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)

In [None]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1, downsample=None):
        super().__init__()
        self.conv1 = conv3x3(in_planes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x) # 3x3, stride=stride
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out) # 3x3, stride=1 고정
        out = self.bn2(out)

        # stride!=1 일 때, identity와 out의 shape을 일치시키기 위한 downsample
        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)
        return out

In [None]:
class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1, downsample=None):
        super().__init__()
        self.conv1 = conv1x1(in_planes, planes)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = conv3x3(planes, planes, stride)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = conv1x1(planes, planes * self.expansion)
        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.stride = stride
        self.downsample = downsample

    def forward(self, x):
        identity = x

        out = self.conv1(x) # 1x1, stride=1
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out) # 3x3, stride=stride
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out) # 1x1, planes * self.expansion
        out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)
        return out

In [None]:
class Age_Net(nn.Module):
    # resnet50 : Bottleneck, [3, 4, 6, 3]
    # resnet18 : BasicBlock, [2, 2, 2, 2]
    def __init__(self, block, layers):
        super().__init__()
        self.inplanes = 64

        # input.shape : (3, 224, 224)
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        # output.shape : (64, 112, 112)

        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)

        # output.shape : (64, 112, 112)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        # output.shape : (64, 56, 56)

        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, 1)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)


    def _make_layer(self, block, planes, blocks, stride=1):
        # 예 : self.layer1 => _make_layer(Bottleneck, 64, 3)
        # 채널수를 맞추기 위한 downsample
        downsample = None
        if stride!=1 or self.inplanes!=planes*block.expansion: # self.inplanes=64 != 64 * 4
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride), #conv1x1(64, 64*4, stride=1)
                nn.BatchNorm2d(planes * block.expansion)
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample)) # Bottleneck(64, 64, 1, downsample)
        self.inplanes = planes * block.expansion # self.inplanes = 64 * 4

        for _ in range(1, blocks): # 반복횟수 2번
            layers.append(block(self.inplanes, planes))

        # self.layer1 = [
        #     Bottleneck(64, 64, 1, downsample),
        #     Bottleneck(256, 64),
        #     Bottleneck(256, 64),
        # ]
        # self.layer2 = [
        #     Bottleneck(256, 128, 2, downsample),
        #     Bottleneck(512, 128),
        #     Bottleneck(512, 128),
        #     Bottleneck(512, 128),
        # ]

        return nn.Sequential(*layers)


    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x) # fc 대체
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        x = self.relu(x)
        return x


In [None]:
# def resnet50():
#     model = Age_Net(Bottleneck, [3, 4, 6, 3])
#     return model

In [None]:
def resnet18():
    model = Age_Net(BasicBlock, [2, 2, 2, 2])
    return model

In [None]:
age_model = resnet18().to(device)
torchsummary.summary(age_model, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]          36,864
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
       BasicBlock-11           [-1, 64, 56, 56]               0
           Conv2d-12           [-1, 64, 56, 56]          36,864
      BatchNorm2d-13           [-1, 64, 56, 56]             128
             ReLU-14           [-1, 64,

In [None]:
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch.optim as optim

opt_age = optim.Adam(age_model.parameters(), lr=0.0003)
age_lr_scheduler = ReduceLROnPlateau(opt_age, mode='min', verbose=True)

In [None]:
class EarlyStopping:
    def __init__(self, patience=3, verbose=False, delta=0):
        self.patience = patience
        self.verbose = verbose
        self.delta = delta
        self.best_loss = np.inf
        self.early_stop = False
        self.counter = 0

    def __call__(self, val_loss):
        if self.best_loss - val_loss > self.delta:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
                if self.verbose:
                    print("Early stopping")

In [None]:
early_stopping = EarlyStopping(patience=3, verbose=True)

In [None]:
def train_loop_age(dataloader, model, loss_fn, optimizer, epoch):
    model.train()
    size = len(dataloader.dataset)
    total_loss = 0.0
    start_time = time.time()  # 에포크 시작 시간

    for batch, (x, y) in enumerate(dataloader):
        batch_start_time = time.time()  # 배치 처리 시작 시간
        x, y = x.to(device), y['age_past'].float().to(device)
        pred = model(x)
        loss = loss_fn(pred.squeeze(), y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

        batch_process_time = time.time() - batch_start_time

        if batch % 10 == 0:
            processed = (batch + 1) * len(x)
            print(f'Epoch {epoch+1} : [{processed} / {size}] loss : {loss.item()}, Batch time: {batch_process_time:.4f} sec')

    average_loss = total_loss / len(dataloader)
    epoch_time = time.time() - start_time

    print(f"Epoch {epoch+1} finished, Total Epoch time: {epoch_time:.4f} sec")
    return average_loss


In [None]:
def validation_loop_age(dataloader, model, loss_fn, device):
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for x, y in dataloader:
            x, y = x.to(device), y['age_past'].float().to(device)
            pred = model(x)
            loss = loss_fn(pred, y)
            val_loss += loss.item()
    val_loss /= len(dataloader)
    return val_loss

In [None]:
def save_model(epoch, model, optimizer, path, train_loss, val_loss):
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'train_loss': train_loss,
        'val_loss': val_loss
    }, path)

In [None]:
start = time.time()
for epoch in range(30):
    age_loss = train_loop_age(train_loader, age_model, nn.MSELoss(), opt_age, epoch)
    val_loss = validation_loop_age(val_loader, age_model, nn.MSELoss(), device)
    age_lr_scheduler.step(val_loss)

    early_stopping(val_loss)
    if early_stopping.early_stop:
        print("Early stopping triggered")
        save_model(epoch, age_model, opt_age, '/content/drive/MyDrive/DL_DATA/DL_Face_REC/Model/resnet50/age_model_checkpoint.pth')
        break

    save_model(epoch, age_model, opt_age,
               f'/content/drive/MyDrive/DL_DATA/DL_Face_REC/Model/resnet50/age_model_checkpoint_epoch_{epoch+1}_loss_{round(age_loss, 2)}.pth',
               age_loss, val_loss)
    print(f'Epoch : {epoch+1}, Loss : {age_loss}, Val_loss : {val_loss}')

total_time = time.time() - start

# 전체 학습 시간 출력
hours, rem = divmod(total_time, 3600)
minutes, seconds = divmod(rem, 60)
print("Total training time: {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

Epoch 1 : [128 / 40125] loss : 707.385498046875, Batch time: 0.3871 sec
Epoch 1 : [1408 / 40125] loss : 400.73675537109375, Batch time: 0.3934 sec
Epoch 1 : [2688 / 40125] loss : 418.5068359375, Batch time: 0.4172 sec
Epoch 1 : [3968 / 40125] loss : 342.96295166015625, Batch time: 0.4066 sec




Epoch 1 : [5248 / 40125] loss : 322.82373046875, Batch time: 0.4123 sec
Epoch 1 : [6528 / 40125] loss : 330.1068115234375, Batch time: 0.3855 sec
Epoch 1 : [7808 / 40125] loss : 287.55584716796875, Batch time: 0.3943 sec
Epoch 1 : [9088 / 40125] loss : 233.83111572265625, Batch time: 0.3987 sec
Epoch 1 : [10368 / 40125] loss : 149.89833068847656, Batch time: 0.4061 sec
Epoch 1 : [11648 / 40125] loss : 182.09693908691406, Batch time: 0.4047 sec
Epoch 1 : [12928 / 40125] loss : 125.2864990234375, Batch time: 0.3948 sec
Epoch 1 : [14208 / 40125] loss : 153.28518676757812, Batch time: 0.4042 sec
Epoch 1 : [15488 / 40125] loss : 136.0650177001953, Batch time: 0.4079 sec
Epoch 1 : [16768 / 40125] loss : 135.35955810546875, Batch time: 0.4315 sec
Epoch 1 : [18048 / 40125] loss : 161.09902954101562, Batch time: 0.4018 sec
Epoch 1 : [19328 / 40125] loss : 134.00595092773438, Batch time: 0.3900 sec
Epoch 1 : [20608 / 40125] loss : 103.98753356933594, Batch time: 0.4252 sec
Epoch 1 : [21888 / 401



Epoch 1 : [38528 / 40125] loss : 74.56941986083984, Batch time: 0.3831 sec
Epoch 1 : [39808 / 40125] loss : 111.48184204101562, Batch time: 0.3826 sec
Epoch 1 finished, Total Epoch time: 17464.2843 sec


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch : 1, Loss : 179.8350289217226, Val_loss : 257.17293491363523
Epoch 2 : [128 / 40125] loss : 80.04081726074219, Batch time: 0.3995 sec
Epoch 2 : [1408 / 40125] loss : 90.19076538085938, Batch time: 0.3858 sec
Epoch 2 : [2688 / 40125] loss : 79.01805877685547, Batch time: 0.4044 sec


KeyboardInterrupt: 

In [None]:
# # age 모델 이어서 학습
# def load_model(model, optimizer, path):
#     checkpoint = torch.load(path)
#     model.load_state_dict(checkpoint['model_state_dict'])
#     optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
#     epoch = checkpoint['epoch']
#     return model, optimizer, epoch

In [None]:
# # 모델과 옵티마이저 초기화
# age_model = Age_Net().to(device)
# opt_age = optim.Adam(age_model.parameters(), lr=0.0003)

# # 체크포인트 불러오기
# age_model, opt_age, start_epoch = load_model(age_model, opt_age, '/content/drive/MyDrive/DL_DATA/Model/age_model_checkpoint_epoch_11.pth')

# # 학습 재개
# start = time.time()
# for epoch in range(start_epoch + 1, start_epoch + 28):
#     age_loss = train_loop_age(train_loader, age_model, nn.MSELoss(), opt_age, epoch)
#     val_loss = validation_loop_age(val_loader, age_model, nn.MSELoss(), device)
#     age_lr_scheduler.step(val_loss)

#     early_stopping(val_loss)
#     if early_stopping.early_stop:
#         print("Early stopping triggered")
#         save_model(epoch, age_model, opt_age, '/content/drive/MyDrive/DL_DATA/Model/age_model_checkpoint.pth')
#         break

#     save_model(epoch, age_model, opt_age, f'/content/drive/MyDrive/DL_DATA/Model/age_model_checkpoint_epoch_{epoch+1}.pth')
#     print(f'Epoch : {epoch + 1}, Loss : {age_loss}, Val_loss : {val_loss}')

# total_time = time.time() - start

# hours, rem = divmod(total_time, 3600)
# minutes, seconds = divmod(rem, 60)
# print("Total training time: {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

# 기존 모델
- Epoch : 1, Loss : 183.76921428333628, Val_loss : 138.7342578125
- Epoch : 2, Loss : 128.2447030203683, Val_loss : 143.2067679595947
- Epoch : 3, Loss : 122.56000444965977, Val_loss : 96.83059211730956
- Epoch : 4, Loss : 87.70850903081437, Val_loss : 78.38082936604818
- Epoch : 5, Loss : 68.04693739406598, Val_loss : 72.64038126627604
- Epoch : 6, Loss : 57.276853927027304, Val_loss : 68.96054336547851
- Epoch : 7, Loss : 51.59112667961242, Val_loss : 57.317286516825355
- Epoch : 8, Loss : 47.01944582150006, Val_loss : 55.66085670471191
- Epoch : 9, Loss : 42.54086226624803, Val_loss : 53.47876875559489
- Epoch : 10, Loss : 39.80607411198723, Val_loss : 54.47281494140625
- Epoch : 11, Loss : 36.430880546569824, Val_loss : 54.80409914652507
- Epoch : 12, Loss : 33.627891385631195, Val_loss : 48.48928497411028
- Epoch : 13, Loss : 31.059670966142303, Val_loss : 51.898619048203095
- Epoch : 14, Loss : 28.95467392198599, Val_loss : 48.438780072369156
- Epoch : 15, Loss : 26.666760903255195, Val_loss : 49.81154549272755
- Epoch : 16, Loss : 24.51864461989919, Val_loss : 43.89916703067249