In [None]:
!git clone https://github.com/cydonia999/VGGFace2-pytorch.git

In [None]:
%cd VGGFace2-pytorch

In [90]:
#import 부분
import time
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torch
import torch.nn as nn
import pandas as pd
import os
import json
import random
import pickle
from functools import partial
from models.resnet import resnet50
from torch.utils.data import WeightedRandomSampler
#정확도 및 mae 임포트
from torchmetrics import Accuracy, MeanAbsoluteError
import numpy as np
import matplotlib.pyplot as plt

In [None]:
print("PyTorch 버전:", torch.__version__)
print("CUDA 사용 가능:", torch.cuda.is_available())

if torch.cuda.is_available():
    print("현재 CUDA 디바이스 인덱스:", torch.cuda.current_device())
    print("CUDA 디바이스 이름:", torch.cuda.get_device_name(0))
else:
    print("CUDA를 사용할 수 없습니다.")

In [92]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
print("사용 디바이스:", device)
print("CUDA 사용 가능 여부:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("현재 CUDA 디바이스 인덱스:", torch.cuda.current_device())
    print("CUDA 디바이스 이름:", torch.cuda.get_device_name(torch.cuda.current_device()))

In [94]:
class DataProcessing(Dataset) :
    
    def __init__(self, image_dir, label_dir, categories, transform=None, mode='train') :
        
        self.datalist = []
        self.transform = transform
        self.label_map = {cat : idx for idx, cat in enumerate(categories)} #숫자로 변환 ?
        self.mode = mode 
        self.age_min = 10
        self.age_max = 60
        
        for category in categories :
            
            json_path = os.path.join(label_dir, f'{self.mode}_crop_{category}.json') 
            img_folder =  os.path.join(image_dir, category)
            
            with open(json_path, 'r', encoding='utf-8') as f :
                label_data = json.load(f)        
                
            for row in label_data :
                filename = row['filename']  # 예: 'abc_crop20.jpg'
                
                # base_filename은 확장자(.jpg)만 제거한 원본명
                base_filename = filename.replace('.jpg', '')  
                
                # base_filename으로 시작하는 모든 jpg파일(원본+증강) 찾기
                matched_files = [f for f in os.listdir(img_folder) if f.startswith(base_filename) and f.endswith('.jpg')]
                
                for matched_file in matched_files:
                    img_path = os.path.join(img_folder, matched_file)

                    if not os.path.isfile(img_path):
                        continue

                    age = row.get('age')
                    if age is not None and age >= 60:
                        continue

                    age_norm = (age - self.age_min) / (self.age_max - self.age_min) if age is not None else 0.0

                    data = {
                        'img_path': img_path,
                        'category': category,
                        'age': age_norm,
                        'raw_age': age,
                        'gender': row.get('gender'),
                    }
                    self.datalist.append(data)
                
    def __len__(self) :
        return len(self.datalist)
    
    def __getitem__(self,idx) :
        
        data_item = self.datalist[idx]
        image = Image.open(data_item['img_path']).convert('RGB')
        
        if self.transform is not None :
            image = self.transform(image)
            
        
        
        age=torch.tensor(data_item['age'], dtype=torch.float32)
        gender = torch.tensor(1 if data_item['gender']=='남' else 0, dtype=torch.long)

        return image, age, gender        

In [None]:
def get_sampling_weights(dataset):
    weights = []
    for sample in dataset.datalist:
        age = sample.get('raw_age')
        gender = sample.get('gender')

        if age is None:
            weights.append(1.0)
            continue

        age_group = (age // 10) * 10  # 10대, 20대, ...
        if age_group == 10:
            weight = 3.0 if gender == '남' else 1.3
        elif age_group == 40:
            weight = 1.1
        elif age_group == 50:
            weight = 2.0 if gender == '남' else 1.5
        elif age_group ==20 :
            weight = 1.4 if gender == '남' else 1.2
        else:
            weight = 1.0
        weights.append(weight)
    return weights

In [96]:
#transform
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456,0.406], #RGB평균
                         [0.229,0.224,0.225])  #RGB 표준편차
])

In [None]:

categories=['anger','happy','panic','sadness']

base_dir = os.path.dirname(os.path.abspath(__file__))

train_image_dir = os.path.join(base_dir, 'augment')  # Final/augment
train_label_dir = os.path.join(base_dir, 'CropData2', 'label', 'train')  # Final/CropData2/label/train

val_image_dir = os.path.join(base_dir, 'CropData2', 'img', 'val')  # Final/CropData2/img/val
val_label_dir = os.path.join(base_dir, 'CropData2', 'label', 'val')  # Final/CropData2/label/val

train_data_load=DataProcessing(train_image_dir,train_label_dir,categories,transform=transform, mode='train')
val_data_load = DataProcessing(val_image_dir, val_label_dir,categories, transform=transform, mode='val')

In [None]:
weights = get_sampling_weights(train_data_load)

# 샘플러 정의
sampler = WeightedRandomSampler(weights, num_samples=len(weights), replacement=True)

In [None]:
train_loader = DataLoader(
    train_data_load,
    batch_size=64,
    sampler=sampler,
    #num_workers=4,   # 2->4로 올려보기
    pin_memory=True  # GPU 사용 시 권장
)
val_loader = DataLoader(
    val_data_load,
    batch_size=64,
    shuffle=False,     # 검증은 보통 셔플 안함
    #num_workers=4,     # 워커 4개로 증가
    pin_memory=True    # GPU에 최적화
)

In [None]:
#====================모델 가져오기 2번째 custum v1.2 ==============
model_v2_2_1_5 = resnet50()

model_v2_2_1_5.fc = nn.Sequential(
    nn.Linear(model_v2_2_1_5.fc.in_features,256),
    nn.BatchNorm1d(256),
    nn.ReLU(),
    nn.Dropout(0.4),
    
    nn.Linear(256,128),
    nn.BatchNorm1d(128),
    nn.ReLU(),
    nn.Dropout(0.3),
    
    nn.Linear(128,64),
    nn.BatchNorm1d(64),
    nn.ReLU(),
    nn.Dropout(0.2),
    
    nn.Linear(64,3)
)

In [None]:
#================ 3.가중치 불러오기 ===============
base_dir = os.path.dirname(os.path.abspath(__file__))
weight_path = os.path.join(base_dir, 'resnet50_ft_weight.pkl')

with open(weight_path, 'rb') as f:
    state_dict = pickle.load(f)
    

for key in state_dict:
    if isinstance(state_dict[key], np.ndarray):
        state_dict[key] = torch.from_numpy(state_dict[key])

model_v2_2_1_5.load_state_dict(state_dict, strict=False)


# 5. 디바이스에 올리기
model_v2_2_1_5 = model_v2_2_1_5.to(device)



In [None]:
#================================================
criterion_age = nn.MSELoss()
criterion_gender = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_v2_2_1_5.parameters(), lr=1e-4)
num_epochs = 30 #수정가능

In [None]:
def evaluate(model, data_loader, device, criterion_age, criterion_gender) :
    model.eval()
    total_loss=0
    accuracy = Accuracy(task='binary').to(device)
    mae = MeanAbsoluteError().to(device) 
    
    with torch.no_grad():
        for images, ages, genders in data_loader :
            images = images.to(device)
            ages  = ages.to(device)
            genders = genders.to(device)
            
            outputs = model(images)
            predicted_age = outputs[:,0]
            predicted_gender_logits = outputs[:,1:3]
            
            loss_age = criterion_age(predicted_age, ages)
            loss_gender = criterion_gender(predicted_gender_logits, genders)
            loss = loss_age + loss_gender
            total_loss += loss.item()
            
            pred = torch.argmax(predicted_gender_logits, dim=1)
            accuracy.update(pred, genders)
            mae.update(predicted_age, ages)
    
    avg_loss = total_loss / len(data_loader)        
    return avg_loss, accuracy.compute(), mae.compute()

In [None]:
#=============모델 저장을 위한 빈 리스트 생성=============
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []
train_maes = []
val_maes = []

In [None]:
#================Early Stopping======================
class EarlyStopping:
    def __init__(self, patience=5, verbose=False):
        self.patience = patience      # 개선 없을 때 참을 에폭 수
        self.verbose = verbose        # 멈출 때 출력 여부
        self.counter = 0              # 개선 없을 때 카운트
        self.best_loss = np.Inf       # 최저 검증 손실 저장
        self.early_stop = False       # 멈춤 여부
        self.best_model_state = None  # 최적 모델 가중치 저장

    def __call__(self, val_loss, model):
        if val_loss < self.best_loss:
            self.best_loss = val_loss
            self.best_model_state = model.state_dict()
            self.counter = 0
            if self.verbose:
                print(f'Validation loss improved to {val_loss:.4f}. Saving model.')
        else:
            self.counter += 1
            if self.verbose:
                print(f'No improvement for {self.counter} epochs.')
            if self.counter >= self.patience:
                if self.verbose:
                    print('Early stopping triggered.')
                self.early_stop = True

In [None]:
early_stopping = EarlyStopping(patience=5, verbose=True)

for epoch in range(num_epochs):
    model_v2_2_1_5.train()
    epoch_loss = 0

    for images, ages, genders in train_loader:
        images = images.to(device)
        ages = ages.to(device)
        genders = genders.to(device)

        outputs = model_v2_2_1_5(images)
        predicted_age = outputs[:, 0]
        predicted_gender_logits = outputs[:, 1:3]

        loss_age = criterion_age(predicted_age, ages)
        loss_gender = criterion_gender(predicted_gender_logits, genders)
        loss = loss_age + loss_gender

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    avg_loss = epoch_loss / len(train_loader)
    train_losses.append(avg_loss)

    # 3 에폭마다 평가 및 기록 수행
    if (epoch + 1) % 3 == 0 or epoch == num_epochs - 1:
        train_loss, train_acc, train_mae = evaluate(model_v2_2_1_5, train_loader, device, criterion_age, criterion_gender)
        val_loss, val_acc, val_mae = evaluate(model_v2_2_1_5, val_loader, device, criterion_age, criterion_gender)

        val_losses.append(val_loss)
        train_accuracies.append(train_acc.item())
        val_accuracies.append(val_acc.item())
        train_maes.append(train_mae.item())
        val_maes.append(val_mae.item())

        print(f'Epoch [{epoch+1}/{num_epochs}]')
        print(f'Train Loss : {train_loss:.4f}, Train Gender Accuracy : {train_acc:.4f}, Train AGE MAE : {train_mae:.4f}')
        print(f'Validation Loss : {val_loss:.4f}, Validation Gender Accuracy : {val_acc:.4f}, Validation AGE MAE : {val_mae:.4f}')

        early_stopping(val_loss, model_v2_2_1_5)

        if early_stopping.early_stop:
            print("Early stopping triggered")
            break

    else:
        # 평가 안 할 때는 학습 손실만 출력
        print(f'Epoch [{epoch+1}/{num_epochs}] - Training loss: {avg_loss:.4f}')

# 가장 좋은 가중치로 복원
model_v2_2_1_5.load_state_dict(early_stopping.best_model_state)

Epoch [1/30] - Training loss: 0.5685
Epoch [2/30] - Training loss: 0.2000
Epoch [3/30]
Train Loss : 0.0564, Train Gender Accuracy : 0.9956, Train AGE MAE : 0.1029
Validation Loss : 0.0692, Validation Gender Accuracy : 0.9900, Validation AGE MAE : 0.0996
Validation loss improved to 0.0692. Saving model.
Epoch [4/30] - Training loss: 0.1043
Epoch [5/30] - Training loss: 0.0898
Epoch [6/30]
Train Loss : 0.0363, Train Gender Accuracy : 0.9960, Train AGE MAE : 0.0949
Validation Loss : 0.0581, Validation Gender Accuracy : 0.9875, Validation AGE MAE : 0.0881
Validation loss improved to 0.0581. Saving model.
Epoch [7/30] - Training loss: 0.0680
Epoch [8/30] - Training loss: 0.0636
Epoch [9/30]
Train Loss : 0.0275, Train Gender Accuracy : 0.9964, Train AGE MAE : 0.0797
Validation Loss : 0.0545, Validation Gender Accuracy : 0.9859, Validation AGE MAE : 0.0884
Validation loss improved to 0.0545. Saving model.
Epoch [10/30] - Training loss: 0.0591
Epoch [11/30] - Training loss: 0.0470
Epoch [12/30

<All keys matched successfully>

In [None]:
base_dir = os.path.dirname(os.path.abspath(__file__))
pth_save_path = os.path.join(base_dir, 'pth_pkl', 'model_raw_weights_v2_2_1_5.pth')
try:
    torch.save(model_v2_2_1_5.state_dict(), pth_save_path)
    print(f'모델 저장 완료 → {pth_save_path}')
except Exception as e:
    print(f'모델 저장 실패: {e}')

In [None]:
history = {
    'train_losses': train_losses,
    'val_losses': val_losses,
    'train_accuracies': train_accuracies,
    'val_accuracies': val_accuracies,
    'train_maes': train_maes,
    'val_maes': val_maes
}

In [None]:
#pkl
base_dir = os.path.dirname(os.path.abspath(__file__))
pkl_save_path = os.path.join(base_dir, 'pth_pkl', 'model_raw_v2_2_1_5.pkl')

try:
    with open(pkl_save_path, "wb") as f:
        pickle.dump(history, f)
    print(f'학습 기록이 성공적으로 저장되었습니다 : {pkl_save_path}')
except Exception as e:
    print(f'학습 기록 저장 실패: {e}')