In [75]:
import os
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np
import pandas as pd
from PIL import Image


In [76]:

# 경로 설정
training_image_path = "/home/gyuha_lee/DCC2024/dataset/bg_remove/rembg/90/training_image_no_bg"
validation_image_path = "/home/gyuha_lee/DCC2024/dataset/bg_remove/rembg/90/validation_image_no_bg"
model_path = "/home/gyuha_lee/DCC2024/mission1/WITHOUT_PRETRAINED/resnet18_gender_style_pretrained.pth"  # 1-2에서 학습된 모델 가중치 파일 경로
csv_path = "/home/gyuha_lee/DCC2024/mission2/top_100_preference_data.csv"  # 2-2에서 생성된 CSV 파일 경로


In [77]:

# 1. ResNet-18 모델 로드 및 학습된 가중치 불러오기
model = models.resnet18(pretrained=False)  # 학습된 가중치를 로드할 것이므로 pretrained=False 설정
model.fc = nn.Identity()  # 마지막 FC 레이어를 제거하여 특징 벡터를 추출하도록 설정
model.load_state_dict(torch.load(model_path), strict=False)  # strict=False로 불필요한 키 무시하고 가중치 불러오기
model.eval()  # 평가 모드로 전환

  model.load_state_dict(torch.load(model_path), strict=False)  # strict=False로 불필요한 키 무시하고 가중치 불러오기


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [78]:

# 2. 이미지 전처리 설정
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


In [79]:

# 3. 특징 벡터 추출 함수 정의
def extract_feature_vector(image_path):
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0)  # 배치 차원 추가
    with torch.no_grad():
        feature_vector = model(image).squeeze().numpy()
    return feature_vector


In [80]:

# 4. CSV 파일 불러오기
csv_data = pd.read_csv(csv_path)


In [81]:

# 5. 사용자별 평균 벡터 계산 및 Validation 데이터 유사도 비교
results = []

for _, row in csv_data.iterrows():
    user_id = row['USER ID']
    training_preferred_files = [img.strip() for img in row['Training prefer'].split(';')]
    training_non_preferred_files = [img.strip() for img in row['Training not prefer'].split(';')]
    validation_preferred_files = [img.strip() for img in row['Validation prefer'].split(';')]
    validation_non_preferred_files = [img.strip() for img in row['Validation not prefer'].split(';')]
    
    # Training 선호 및 비선호 파일의 특징 벡터 추출 및 평균 계산
    preferred_features = [extract_feature_vector(os.path.join(training_image_path, img)) for img in training_preferred_files]
    non_preferred_features = [extract_feature_vector(os.path.join(training_image_path, img)) for img in training_non_preferred_files]
    
    if preferred_features:
        average_preferred_vector = np.mean(preferred_features, axis=0)
    else:
        average_preferred_vector = None

    if non_preferred_features:
        average_non_preferred_vector = np.mean(non_preferred_features, axis=0)
    else:
        average_non_preferred_vector = None

    # Validation 데이터의 특징 벡터 추출 및 유사도 비교
    for val_img in validation_preferred_files:
        val_feature = extract_feature_vector(os.path.join(validation_image_path, val_img))
        if average_preferred_vector is not None:
            similarity = cosine_similarity(val_feature.reshape(1, -1), average_preferred_vector.reshape(1, -1))[0][0]
            predicted_label = 1 if similarity >= 0.5 else 0  # 유사도가 0.5 이상이면 선호로 예측
            results.append({
                'user_id': user_id,
                'validation_file': val_img,
                'true_label': 1,  # 실제 레이블은 선호
                'predicted_label': predicted_label
            })

    for val_img in validation_non_preferred_files:
        val_feature = extract_feature_vector(os.path.join(validation_image_path, val_img))
        if average_non_preferred_vector is not None:
            similarity = cosine_similarity(val_feature.reshape(1, -1), average_non_preferred_vector.reshape(1, -1))[0][0]
            predicted_label = 1 if similarity >= 0.5 else 0  # 유사도가 0.5 이상이면 선호로 예측
            results.append({
                'user_id': user_id,
                'validation_file': val_img,
                'true_label': 0,  # 실제 레이블은 비선호
                'predicted_label': predicted_label
            })


IsADirectoryError: [Errno 21] Is a directory: '/home/gyuha_lee/DCC2024/dataset/bg_remove/rembg/90/validation_image_no_bg'

In [None]:

# 6. 성능 평가
true_labels = [result['true_label'] for result in results]
predicted_labels = [result['predicted_label'] for result in results]

accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels)
f1 = f1_score(true_labels, predicted_labels)


In [None]:

# 성능 결과 출력
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

# 7. 예측 결과를 데이터프레임으로 변환 및 출력
results_df = pd.DataFrame(results)