In [None]:
from google.colab import drive

drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
import torchsummary
import numpy as np
from PIL import Image
import json
import time
import random
import h5py

In [None]:
def seed_everything(seed=42):
    random.seed(seed)  # Python random 모듈
    np.random.seed(seed)  # Numpy 랜덤 시드
    torch.manual_seed(seed)  # PyTorch 랜덤 시드
    torch.cuda.manual_seed(seed)  # GPU를 위한 PyTorch 랜덤 시드
    torch.cuda.manual_seed_all(seed)  # 멀티 GPU를 위한 PyTorch 랜덤 시드

seed_everything()

# 데이터셋 정의

In [None]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, hdf5, transform=None):
        self.transform = transform
        self.file_object = h5py.File(hdf5, 'r')
        self.keys = list(self.file_object['image'].keys())

    def __len__(self):
        return len(self.keys)

    def __getitem__(self, index):
        key = self.keys[index]
        image = self.file_object['image'][key][:]
        age = self.file_object['age'][key][()]
        box = self.file_object['box'][key][()]

        # 먼저 어레이를 PIL 이미지로 변환
        image = Image.fromarray(image)
        image = image.crop((box[0], box[1], box[0] + box[2], box[1] + box[3]))

        if self.transform:
            image = self.transform(image)

        label = {'age_past': age}

        return image, label

    def close(self):
        self.file_object.close()


In [None]:
# 40대 이상 데이터 오버샘플링 - 비율 역수 가중치 부여
class OverSampleDataset(torch.utils.data.Dataset):
    def __init__(self, hdf5, transform=None):
        self.transform = transform
        self.file_object = h5py.File(hdf5, 'r')
        self.keys = list(self.file_object['image'].keys())

        # 연령대별 키 분류
        self.age_groups = {
            'below_60': [],
            # '40s': [],
            # '50s': [],
            '60s_plus': []
        }
        for key in self.keys:
            age = self.file_object['age'][key][()]
            if age < 60:
                self.age_groups['below_60'].append(key)
            # elif age < 50:
            #     self.age_groups['40s'].append(key)
            # elif age < 60:
                # self.age_groups['50s'].append(key)
            else:
                self.age_groups['60s_plus'].append(key)

        # 오버 샘플링 비율 계산 및 적용
        over_sampled_keys = self.age_groups['below_60']
        for group, keys in self.age_groups.items():
            if group == 'below_60':
                continue
            # oversample_rate = max(1, len(over_sampled_keys) / len(keys))  # 비율의 역수 계산, 최소 1배는 유지
            oversample_rate = 4
            oversampled_keys = np.random.choice(keys, int(len(keys) * oversample_rate), replace=True).tolist()
            over_sampled_keys.extend(oversampled_keys)  # 오버샘플링된 키 추가

        self.over_sampled_keys = over_sampled_keys

    def __len__(self):
        return len(self.over_sampled_keys)

    def __getitem__(self, index):
        key = self.over_sampled_keys[index]
        image = self.file_object['image'][key][:]
        age = self.file_object['age'][key][()]
        box = self.file_object['box'][key][()]

        image = Image.fromarray(image)
        image = image.crop((box[0], box[1], box[0] + box[2], box[1] + box[3]))

        if self.transform:
            image = self.transform(image)

        label = {'age_past': age}

        return image, label

    def close(self):
        self.file_object.close()


In [None]:
# 랜덤하게 적용
random_transforms = transforms.RandomApply([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2)
], p=0.7)

# 항상 적용
always_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.6284, 0.4901, 0.4325], std=[0.1869, 0.1712, 0.1561]),
])

transform_train = transforms.Compose([
    random_transforms,
    always_transforms
])

transform_val = transforms.Compose([
    always_transforms
])

In [None]:
train_dataset = OverSampleDataset('/content/drive/MyDrive/DL_DATA/DL_Face_REC/data_age_train.hdf5', transform=transform_train)
val_dataset = CustomDataset('/content/drive/MyDrive/DL_DATA/DL_Face_REC/data_age_val.hdf5', transform=transform_val)

In [None]:
batch_size = 256

train_loader = torch.utils.data.DataLoader(
    dataset=train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=2,
    prefetch_factor=8
)

val_loader = torch.utils.data.DataLoader(
    dataset=val_dataset,
    batch_size=batch_size,
    num_workers=2,
    prefetch_factor=8
)

In [None]:
len(train_loader), len(val_loader)

(160, 20)

In [None]:
# # DataLoader에서 단일 배치 테스트
# def test_single_batch(data_loader):
#     for data, labels in data_loader:
#         print("Data shape:", data.shape)
#         print("Labels shape:", labels['age_past'].shape)
#         break

# # DataLoader 전체 테스트
# def test_full_loader(data_loader):
#     for i, (data, labels) in enumerate(data_loader):
#         if i % 10 == 0:
#             print(f"Batch {i}, Data shape: {data.shape}, Labels shape: {labels['age_past'].shape}")

# # val_loader 테스트 실행
# print("Testing single batch from val_loader:")
# test_single_batch(val_loader)

# print("\nTesting full val_loader:")
# test_full_loader(val_loader)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


# 모델 - Inception ResNet v1
- 사전 학습 모델 사용
- 손실함수 MSE -> MAE

In [None]:
!pip install facenet-pytorch

Collecting facenet-pytorch
  Downloading facenet_pytorch-2.5.3-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: facenet-pytorch
Successfully installed facenet-pytorch-2.5.3


In [None]:
from facenet_pytorch import InceptionResnetV1

def make_age_model():
    model = InceptionResnetV1(classify=True, pretrained='vggface2', num_classes=1, device=device)
    in_dim = model.logits.in_features
    model.logits = nn.Linear(in_dim, 1)
    # 상단의 파라미터를 프리즈
    for param in model.parameters():
        param.requires_grad = False

    # (block8) 부분의 파라미터만 파인튜닝을 위해 언프리즈
    for param in model.block8.parameters():
        param.requires_grad = True

    for param in model.last_linear.parameters():
        param.requires_grad = True

    for param in model.last_bn.parameters():
        param.requires_grad = True

    for param in model.logits.parameters():
        param.requires_grad = True

    return model

age_model = make_age_model().to(device)

  0%|          | 0.00/107M [00:00<?, ?B/s]

In [None]:
# torchsummary.summary(age_model, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 111, 111]             864
       BatchNorm2d-2         [-1, 32, 111, 111]              64
              ReLU-3         [-1, 32, 111, 111]               0
       BasicConv2d-4         [-1, 32, 111, 111]               0
            Conv2d-5         [-1, 32, 109, 109]           9,216
       BatchNorm2d-6         [-1, 32, 109, 109]              64
              ReLU-7         [-1, 32, 109, 109]               0
       BasicConv2d-8         [-1, 32, 109, 109]               0
            Conv2d-9         [-1, 64, 109, 109]          18,432
      BatchNorm2d-10         [-1, 64, 109, 109]             128
             ReLU-11         [-1, 64, 109, 109]               0
      BasicConv2d-12         [-1, 64, 109, 109]               0
        MaxPool2d-13           [-1, 64, 54, 54]               0
           Conv2d-14           [-1, 80,

# 학습과정

In [None]:
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch.optim as optim

opt_age = optim.Adam(age_model.parameters(), lr=0.0003)
age_lr_scheduler = ReduceLROnPlateau(opt_age, mode='min', verbose=True, cooldown=3)

In [None]:
class EarlyStopping:
    def __init__(self, patience=5, verbose=False, delta=0):
        self.patience = patience
        self.verbose = verbose
        self.delta = delta
        self.best_loss = np.inf
        self.early_stop = False
        self.counter = 0

    def __call__(self, val_loss):
        if self.best_loss - val_loss > self.delta:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
                if self.verbose:
                    print("Early stopping")

In [None]:
early_stopping = EarlyStopping(verbose=True)

In [None]:
def train_loop_age(dataloader, model, loss_fn, optimizer, epoch):
    model.train()
    size = len(dataloader.dataset)
    total_loss = 0.0
    start_time = time.time()  # 에포크 시작 시간

    for batch, (x, y) in enumerate(dataloader):
        batch_start_time = time.time()  # 배치 처리 시작 시간
        x, y = x.to(device), y['age_past'].float().to(device)
        pred = model(x)
        loss = loss_fn(pred.squeeze(), y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

        if batch % 10 == 0:
            batch_process_time = time.time() - batch_start_time
            processed = (batch + 1) * len(x)
            print(f'Epoch {epoch+1} : [{processed} / {size}] loss : {loss.item()}, Batch time: {batch_process_time:.4f} sec')

    average_loss = total_loss / len(dataloader)
    epoch_time = time.time() - start_time

    print(f"Epoch {epoch+1} Finished, Total Epoch time: {epoch_time:.5f} sec, Train Loss: {average_loss:.5f}")
    return average_loss


In [None]:
def validation_loop_age(dataloader, model, loss_fn):
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for x, y in dataloader:
            x, y = x.to(device), y['age_past'].float().to(device)
            pred = model(x)
            loss = loss_fn(pred.squeeze(), y)
            val_loss += loss.item()
    val_loss /= len(dataloader)
    return val_loss

In [None]:
def save_model(epoch, model, optimizer, path, train_loss, val_loss, scheduler):
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'train_loss': train_loss,
        'val_loss': val_loss,
        'scheduler': scheduler.state_dict() # 스케쥴러도 함께 저장
    }, path)

In [None]:
!pip install torchinfo

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


In [None]:
from torchinfo import summary

summary(age_model, input_size=(256, 3, 224, 224))

Layer (type:depth-idx)                        Output Shape              Param #
InceptionResnetV1                             [256, 1]                  --
├─BasicConv2d: 1-1                            [256, 32, 111, 111]       --
│    └─Conv2d: 2-1                            [256, 32, 111, 111]       (864)
│    └─BatchNorm2d: 2-2                       [256, 32, 111, 111]       (64)
│    └─ReLU: 2-3                              [256, 32, 111, 111]       --
├─BasicConv2d: 1-2                            [256, 32, 109, 109]       --
│    └─Conv2d: 2-4                            [256, 32, 109, 109]       (9,216)
│    └─BatchNorm2d: 2-5                       [256, 32, 109, 109]       (64)
│    └─ReLU: 2-6                              [256, 32, 109, 109]       --
├─BasicConv2d: 1-3                            [256, 64, 109, 109]       --
│    └─Conv2d: 2-7                            [256, 64, 109, 109]       (18,432)
│    └─BatchNorm2d: 2-8                       [256, 64, 109, 109]       (128)

In [None]:
# start = time.time()
# for epoch in range(150):
#     age_loss = train_loop_age(train_loader, age_model, nn.L1Loss(), opt_age, epoch)
#     val_loss = validation_loop_age(val_loader, age_model, nn.L1Loss())
#     age_lr_scheduler.step(val_loss)

#     early_stopping(val_loss)
#     if early_stopping.early_stop:
#         print("Early stopping")
#         save_model(epoch, age_model, opt_age, '/content/drive/MyDrive/DL_DATA/DL_Face_REC/Model/inception_resnet_v1/age_model_checkpoint.pth', age_loss, val_loss, age_lr_scheduler)
#         break

#     save_model(epoch, age_model, opt_age,
#                f'/content/drive/MyDrive/DL_DATA/DL_Face_REC/Model/inception_resnet_v1/age_model_checkpoint_epoch_{epoch+1}_loss_{round(val_loss, 2)}.pth',
#                age_loss, val_loss, age_lr_scheduler)
#     print(f'Epoch : {epoch+1}, Loss : {age_loss:.5f}, Val_loss : {val_loss:.5f}')

# total_time = time.time() - start

# # 전체 학습 시간 출력
# hours, rem = divmod(total_time, 3600)
# minutes, seconds = divmod(rem, 60)
# print("Total training time: {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

Epoch 1 : [256 / 40882] loss : 22.37364959716797, Batch time: 0.7102 sec
Epoch 1 : [2816 / 40882] loss : 22.72926902770996, Batch time: 0.8148 sec
Epoch 1 : [5376 / 40882] loss : 23.503276824951172, Batch time: 0.7009 sec
Epoch 1 : [7936 / 40882] loss : 21.73055076599121, Batch time: 0.7593 sec
Epoch 1 : [10496 / 40882] loss : 23.04936408996582, Batch time: 0.7216 sec
Epoch 1 : [13056 / 40882] loss : 23.541423797607422, Batch time: 0.7979 sec
Epoch 1 : [15616 / 40882] loss : 22.43260955810547, Batch time: 0.7020 sec
Epoch 1 : [18176 / 40882] loss : 22.472015380859375, Batch time: 0.7178 sec
Epoch 1 : [20736 / 40882] loss : 22.748119354248047, Batch time: 0.7874 sec
Epoch 1 : [23296 / 40882] loss : 22.626632690429688, Batch time: 0.7777 sec
Epoch 1 : [25856 / 40882] loss : 23.942699432373047, Batch time: 0.7164 sec
Epoch 1 : [28416 / 40882] loss : 20.647220611572266, Batch time: 0.7717 sec
Epoch 1 : [30976 / 40882] loss : 19.21630096435547, Batch time: 0.7567 sec
Epoch 1 : [33536 / 4088

KeyboardInterrupt: 

# 이어서 학습

In [None]:
# age 모델 이어서 학습
def load_model(model, optimizer, path, scheduler):
    checkpoint = torch.load(path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    scheduler.load_state_dict(checkpoint['scheduler'])
    return model, optimizer, epoch, scheduler

In [None]:
# 모델과 옵티마이저, 스케줄러
age_model = make_age_model().to(device)
opt_age = optim.Adam(age_model.parameters(), lr=0.0003)
age_lr_scheduler = ReduceLROnPlateau(opt_age, mode='min', verbose=True, cooldown=3)

# 체크포인트 불러오기
age_model, opt_age, start_epoch, age_lr_scheduler = load_model(age_model, opt_age,
                                             '/content/drive/MyDrive/DL_DATA/DL_Face_REC/Model/inception_resnet_v1/age_model_checkpoint_epoch_7_loss_4.72.pth',
                                                               age_lr_scheduler)

# 학습 재개
start = time.time()
for epoch in range(start_epoch + 1, 150):
    age_loss = train_loop_age(train_loader, age_model, nn.L1Loss(), opt_age, epoch)
    val_loss = validation_loop_age(val_loader, age_model, nn.L1Loss())
    age_lr_scheduler.step(val_loss)

    early_stopping(val_loss)
    if early_stopping.early_stop:
        print("Early stopping triggered")
        save_model(epoch, age_model, opt_age, '/content/drive/MyDrive/DL_DATA/DL_Face_REC/Model/inception_resnet_v1/age_model_checkpoint.pth',
                   age_loss, val_loss, age_lr_scheduler)
        break

    save_model(epoch, age_model, opt_age,
               f'/content/drive/MyDrive/DL_DATA/DL_Face_REC/Model/inception_resnet_v1/age_model_checkpoint_epoch_{epoch+1}_loss_{round(val_loss, 2)}.pth',
               age_loss, val_loss, age_lr_scheduler)

    print(f'Epoch : {epoch + 1}, Loss : {age_loss}, Val_loss : {val_loss}')

total_time = time.time() - start

hours, rem = divmod(total_time, 3600)
minutes, seconds = divmod(rem, 60)
print("Total training time: {:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

Epoch 8 : [256 / 40882] loss : 4.3025126457214355, Batch time: 0.7389 sec
Epoch 8 : [2816 / 40882] loss : 4.3405537605285645, Batch time: 0.7133 sec
Epoch 8 : [5376 / 40882] loss : 4.614660263061523, Batch time: 0.6860 sec
Epoch 8 : [7936 / 40882] loss : 5.0754570960998535, Batch time: 0.7100 sec
Epoch 8 : [10496 / 40882] loss : 3.879998207092285, Batch time: 0.7308 sec
Epoch 8 : [13056 / 40882] loss : 4.335206031799316, Batch time: 0.6968 sec
Epoch 8 : [15616 / 40882] loss : 4.504642486572266, Batch time: 0.7812 sec
Epoch 8 : [18176 / 40882] loss : 3.922011375427246, Batch time: 0.7267 sec
Epoch 8 : [20736 / 40882] loss : 4.035374641418457, Batch time: 0.7830 sec
Epoch 8 : [23296 / 40882] loss : 4.30877685546875, Batch time: 0.7207 sec
Epoch 8 : [25856 / 40882] loss : 3.931520700454712, Batch time: 0.7423 sec
Epoch 8 : [28416 / 40882] loss : 3.8826920986175537, Batch time: 0.7767 sec
Epoch 8 : [30976 / 40882] loss : 4.561410903930664, Batch time: 0.7084 sec
Epoch 8 : [33536 / 40882] l

KeyboardInterrupt: 

# Inception ResNet v1
- Epoch : 1, Loss : 22.03221, Val_loss : 20.58909
- Epoch : 2, Loss : 17.07232, Val_loss : 11.06361
- Epoch : 3, Loss : 8.42461, Val_loss : 5.66821
- Epoch : 4, Loss : 5.18108, Val_loss : 4.88074
- Epoch : 5, Loss : 4.75659, Val_loss : 4.87239
- Epoch : 6, Loss : 4.54383, Val_loss : 4.71759
- Epoch : 7, Loss : 4.37443, Val_loss : 4.71527
- Epoch : 8, Loss : 4.245163451135158, Val_loss : 4.642987310886383
- Epoch : 9, Loss : 4.14693269431591, Val_loss : 4.670093894004822
- Epoch : 10, Loss : 4.080686891078949, Val_loss : 4.573235726356506
- Epoch : 11, Loss : 4.006946535408497, Val_loss : 4.663610517978668

# Resnet18
- Epoch 1 finished, Total Epoch time: 5526.9220 sec
- Epoch : 1, Loss : 230.3899026858579, Val_loss : 181.15303230285645
- Epoch : 2, Loss : 87.90939525434166, Val_loss : 114.64087181091308
- Epoch : 3, Loss : 63.55927043355954, Val_loss : 76.98570957183838
- Epoch : 4, Loss : 50.9931274219683, Val_loss : 54.968890857696536
- Epoch : 5, Loss : 43.18443214513694, Val_loss : 50.175275039672854
- Epoch : 6, Loss : 36.12652318189099, Val_loss : 62.178778648376465
- Epoch : 7, Loss : 32.96918888456503, Val_loss : 45.514417791366576
- Epoch : 8, Loss : 28.73120542392609, Val_loss : 44.439664649963376
- Epoch : 9, Loss : 26.715184278548904, Val_loss : 44.155246591567995
- Epoch : 10, Loss : 24.47770042176459, Val_loss : 45.13270831108093
-> 검증데이터 손실이 감소하지 않아, colorjitter 추가
- transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2)
- Epoch : 11, Loss : 23.683984027546682, Val_loss : 47.31354990005493
- Epoch : 12, Loss : 21.20979366788439, Val_loss : 38.79328966140747
- Epoch : 13, Loss : 21.470603432624962, Val_loss : 38.411310482025144
- Epoch : 14, Loss : 19.810259369528218, Val_loss : 44.32828512191772
- Epoch : 15, Loss : 18.40842961353861, Val_loss : 36.42188596725464
- Epoch : 16, Loss : 17.623453067366484, Val_loss : 35.0176604270935
- Epoch : 17, Loss : 16.51842679795186, Val_loss : 42.73808040618896
- Epoch : 18, Loss : 16.016441867609693, Val_loss : 34.663173627853396
- Epoch : 19, Loss : 14.968255115922089, Val_loss : 37.80206971168518
- Epoch : 20, Loss : 14.573177337646484, Val_loss : 46.45549983978272
- Epoch : 21, Loss : 14.00512559246865, Val_loss : 34.19767136573792
- Epoch : 22, Loss : 11.325439486533973, Val_loss : 39.99099979400635
-> 60대 이상에 대해 오버 샘플링(oversample_rate=4, random_apply 0.6)
- Epoch : 22, Loss : 15.139811009168625, Val_loss : 35.96300897598267
- Epoch : 23, Loss : 14.79718075990677, Val_loss : 33.31015124320984
- Epoch : 24, Loss : 14.064417380094529, Val_loss : 50.18931665420532

In [None]:
# import matplotlib.pyplot as plt

# epochs = list(range(1, 25))
# train_loss = []
# val_loss = []

# plt.figure(figsize=(10, 6))
# plt.plot(epochs, train_loss, label='Training Loss', color='blue')
# plt.plot(epochs, val_loss, label='Validation Loss', color='red')
# plt.title('Training and Validation Loss')
# plt.xlabel('Epoch')
# plt.ylabel('Loss')
# plt.legend()
# plt.grid(True)
# plt.show()

In [None]:
train_loader.dataset.close()
val_loader.dataset.close()

# VGG
- Epoch : 1, Loss : 183.76921428333628, Val_loss : 138.7342578125
- Epoch : 2, Loss : 128.2447030203683, Val_loss : 143.2067679595947
- Epoch : 3, Loss : 122.56000444965977, Val_loss : 96.83059211730956
- Epoch : 4, Loss : 87.70850903081437, Val_loss : 78.38082936604818
- Epoch : 5, Loss : 68.04693739406598, Val_loss : 72.64038126627604
- Epoch : 6, Loss : 57.276853927027304, Val_loss : 68.96054336547851
- Epoch : 7, Loss : 51.59112667961242, Val_loss : 57.317286516825355
- Epoch : 8, Loss : 47.01944582150006, Val_loss : 55.66085670471191
- Epoch : 9, Loss : 42.54086226624803, Val_loss : 53.47876875559489
- Epoch : 10, Loss : 39.80607411198723, Val_loss : 54.47281494140625
- Epoch : 11, Loss : 36.430880546569824, Val_loss : 54.80409914652507
- Epoch : 12, Loss : 33.627891385631195, Val_loss : 48.48928497411028
- Epoch : 13, Loss : 31.059670966142303, Val_loss : 51.898619048203095
- Epoch : 14, Loss : 28.95467392198599, Val_loss : 48.438780072369156
- Epoch : 15, Loss : 26.666760903255195, Val_loss : 49.81154549272755
- Epoch : 16, Loss : 24.51864461989919, Val_loss : 43.89916703067249