In [5]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split

import torch
from torch.utils.data import Dataset, DataLoader, Subset
from torchvision import transforms, models
import torch.nn as nn
import torchmetrics
from tqdm import tqdm
import matplotlib.pyplot as plt


In [35]:
# 클래스 이름 리스트 정의 (실제 데이터에 맞게 수정 필요)
class_names = ["Romantic", "Resort", "Tomboy", "Manish", "Genderless", 
               "Modern", "Sophisticated", "Sporty", "Country", "Classic", "Feminine"]


In [6]:
def create_image_paths(df, column_name, base_path='../data'):
    df['Image Path'] = df[column_name].apply(lambda x: os.path.join(base_path, f'{x}.jpg'))
    return df

class ClothesDataset(Dataset):
    def __init__(self, df, transform=None):
        self.transform = transform
        self.image_paths = df['Image Path'].apply(lambda x: os.path.abspath(os.path.join('../data', x))).to_numpy()
        self.labels = np.array(df.iloc[:, :-1])
        print(f'Labels shape: {self.labels.shape}')
        
    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        try:
            image = Image.open(img_path).convert("RGB")
        except Exception as e:
            print(f"Error loading image {img_path}: {e}")
            return None, None

        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)
        
        return image.clone().detach().float(), torch.tensor(label, dtype=torch.float32)


In [7]:
def calculate_partial_accuracy(outputs, labels):
    preds = (outputs > 0.5).float()
    correct = ((preds * labels).sum(dim=1) > 0).float()
    accuracy = correct.mean().item()
    return accuracy

def calculate_metrics(outputs, labels):
    preds = (outputs > 0.5).float().cpu().numpy()
    labels = labels.cpu().numpy()

    precision = precision_score(labels, preds, average='macro')
    recall = recall_score(labels, preds, average='macro')
    f1 = f1_score(labels, preds, average='macro')

    return precision, recall, f1


In [8]:
def load_model(model_path, device, num_classes):
    model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
    for param in model.parameters():
        param.requires_grad = False

    num_ftrs = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Linear(num_ftrs, 1024),
        nn.ReLU(),
        nn.BatchNorm1d(1024),  # Batch Normalization 추가
        nn.Dropout(0.4),  # Dropout 확률 증가
        nn.Linear(1024, 512),
        nn.ReLU(),
        nn.BatchNorm1d(512),  # Batch Normalization 추가
        nn.Dropout(0.4),  # Dropout 확률 증가
        nn.Linear(512, 256),
        nn.ReLU(),
        nn.BatchNorm1d(256),  # Batch Normalization 추가
        nn.Dropout(0.4),  # Dropout 확률 증가
        nn.Linear(256, 11)  # len(label_columns)는 클래스 수
    )

    model = model.to(device)
    model.load_state_dict(torch.load(model_path))
    return model


In [29]:
def load_model34(model_path, device, num_classes):
    model = models.resnet34(weights=models.ResNet34_Weights.DEFAULT)
    for param in model.parameters():
        param.requires_grad = False

    num_ftrs = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Linear(num_ftrs, 1024),
        nn.ReLU(),
        nn.Dropout(0.2),
        nn.Linear(1024, num_classes)
    )

    model = model.to(device)
    model.load_state_dict(torch.load(model_path))
    return model


In [30]:
def load_model18(model_path, device, num_classes):
    model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
    for param in model.parameters():
        param.requires_grad = False

    num_ftrs = model.fc.in_features
    model.fc = nn.Sequential(
        nn.Linear(num_ftrs, 1024),
        nn.ReLU(),
        nn.Dropout(0.2),
        nn.Linear(1024, num_classes)
    )

    model = model.to(device)
    model.load_state_dict(torch.load(model_path))
    return model


In [56]:
def evaluate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    total_samples = 0

    all_labels = []
    all_outputs = []

    with torch.no_grad():
        for inputs, labels in tqdm(loader, desc="Evaluating"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            outputs = torch.sigmoid(outputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()

            all_labels.append(labels.cpu().numpy())
            all_outputs.append(outputs.cpu().numpy())

            total_samples += labels.size(0)

    avg_loss = running_loss / len(loader)
    all_labels = np.vstack(all_labels)
    all_outputs = np.vstack(all_outputs)

    precision, recall, f1 = calculate_metrics(torch.tensor(all_outputs), torch.tensor(all_labels))

    return avg_loss, precision, recall, f1


In [None]:
def evaluate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in tqdm(loader, desc="Evaluating"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            outputs = torch.sigmoid(outputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()

            # 정확도 계산
            accuracy = calculate_accuracy(outputs, labels)
            correct += accuracy * inputs.size(0)
            total += inputs.size(0)

    avg_loss = running_loss / len(loader)
    accuracy = correct / total
    return avg_loss, accuracy

def balance_labels(df, sample_size):
    balanced_dfs = []
    image_paths = df['Image Path']
    label_columns = df.columns[:-1]
    
    for label in label_columns:
        label_df = df[df[label] == 1]
        sampled_label_df = label_df.sample(n=min(len(label_df), sample_size), random_state=42)
        balanced_dfs.append(sampled_label_df)
    
    final_df = pd.concat(balanced_dfs).reset_index(drop=True)
    final_df['Image Path'] = image_paths.loc[final_df.index].values
    
    return final_df

In [9]:
# GPU 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 데이터프레임 로드
df_raw = pd.read_csv('multi_label_dataset.csv')
df = df_raw.copy()
df = df.drop(['Image Filename', 'None'], axis=1)

# 이미지 경로 생성
df = create_image_paths(df, 'Image ID')
df = df.drop(['Unnamed: 0', 'Image ID'], axis=1)
df = df.drop(['스트리트'],axis=1)

# 데이터셋 및 데이터 로더 설정
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])
dataset = ClothesDataset(df=df, transform=transform)

# 전체 데이터셋의 인덱스를 생성
indices = list(range(len(dataset)))
train_val_indices, test_indices = train_test_split(indices, test_size=0.1, random_state=42)
test_dataset = Subset(dataset, test_indices)

test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, pin_memory=True)

# 모델 로드
model_path = 'best_model_integrated_v3.pth'  # 모델 파일 경로
model = load_model(model_path, device, len(class_names))

# 손실 함수
criterion = nn.BCELoss()

# 모델 평가
test_loss, test_precision, test_recall, test_f1 = evaluate(model, test_loader, criterion, device)
print(f'Test Loss: {test_loss:.3f}')
print(f'Test Precision: {test_precision * 100:.2f}%')
print(f'Test Recall: {test_recall * 100:.2f}%')
print(f'Test F1 Score: {test_f1 * 100:.2f}%')


Labels shape: (397555, 11)


NameError: name 'class_names' is not defined

In [51]:
# GPU 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 데이터프레임 로드
df_raw = pd.read_csv('multi_label_dataset.csv')
df = df_raw.copy()
df = df.drop(['Image Filename', 'None'], axis=1)

# 이미지 경로 생성
df = create_image_paths(df, 'Image ID')
df = df.drop(['Unnamed: 0', 'Image ID'], axis=1)
df = df.drop(['스트리트'],axis=1)

# 데이터셋 및 데이터 로더 설정
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])
dataset = ClothesDataset(df=df, transform=transform)

# 전체 데이터셋의 인덱스를 생성
indices = list(range(len(dataset)))
train_val_indices, test_indices = train_test_split(indices, test_size=0.1, random_state=42)
test_dataset = Subset(dataset, test_indices)

test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, pin_memory=True)

# 모델 로드
model_path = 'best_model_integrated_v4.pth'  # 모델 파일 경로
model = load_model(model_path, device, len(class_names))

# 손실 함수
criterion = nn.BCELoss()

# 모델 평가
test_loss, test_precision, test_recall, test_f1 = evaluate(model, test_loader, criterion, device)
print(f'Test Loss: {test_loss:.3f}')
print(f'Test Precision: {test_precision * 100:.2f}%')
print(f'Test Recall: {test_recall * 100:.2f}%')
print(f'Test F1 Score: {test_f1 * 100:.2f}%')


Labels shape: (397555, 11)


NameError: name 'load_model50' is not defined

In [3]:
import torch
import numpy as np
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score

def calculate_metrics(outputs, labels):
    # 이진 분류일 경우 적절한 임계값을 적용
    preds = (outputs > 0.5).int()

    # 정밀도, 재현율, F1 스코어 계산
    precision = precision_score(labels, preds, average='binary')
    recall = recall_score(labels, preds, average='binary')
    f1 = f1_score(labels, preds, average='binary')

    return precision, recall, f1

def evaluate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    total_samples = 0

    all_labels = []
    all_outputs = []

    with torch.no_grad():
        for inputs, labels in tqdm(loader, desc="Evaluating"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            outputs = torch.sigmoid(outputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()

            all_labels.append(labels.cpu().numpy())
            all_outputs.append(outputs.cpu().numpy())

            total_samples += labels.size(0)

    avg_loss = running_loss / len(loader)
    all_labels = np.vstack(all_labels)
    all_outputs = np.vstack(all_outputs)

    precision, recall, f1 = calculate_metrics(torch.tensor(all_outputs), torch.tensor(all_labels))

    return avg_loss, precision, recall, f1

# 예시 사용법
# avg_loss, precision, recall, f1 = evaluate(model, test_loader, criterion, device)
# print(f"Test Loss: {avg_loss}, Precision: {precision}, Recall: {recall}, F1 Score: {f1}")


In [4]:
evaluate(model, test_loader, criterion, device)

NameError: name 'model' is not defined