In [12]:
import torch

print("CUDA:", torch.version.cuda)
print("is_available:", torch.cuda.is_available())
print("Device:", torch.cuda.get_device_name(0))


CUDA: 12.1
is_available: True
Device: NVIDIA GeForce RTX 4070


In [13]:
import torch

print("CUDA 사용 가능:", torch.cuda.is_available())
print("GPU 이름:", torch.cuda.get_device_name(0))


CUDA 사용 가능: True
GPU 이름: NVIDIA GeForce RTX 4070


In [14]:
# ✅ 셀 1: 모듈 import 및 경로 설정
import os
import random
import torch
import torch.nn as nn
import torchvision.models as models
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from PIL import Image
import time


data_root = r"C:\ts_data_triplet_split\train"
song_ids = sorted([d for d in os.listdir(data_root) if os.path.isdir(os.path.join(data_root, d))])
random.seed(42)
random.shuffle(song_ids)
split_ratio = 0.8
split_index = int(len(song_ids) * split_ratio)
train_ids = song_ids[:split_index]
val_ids = song_ids[split_index:]


In [15]:
# ✅ 셀 2: EfficientNet 임베딩 모델 정의
class EfficientNetEmbedding(nn.Module):
    def __init__(self, embedding_size=128):
        super().__init__()
        self.base_model = models.efficientnet_b0(pretrained=True)
        for param in self.base_model.parameters():
            param.requires_grad = False
        self.features = self.base_model.features
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.embedding = nn.Linear(1280, embedding_size)
        self.l2_norm = nn.functional.normalize

    def forward(self, x):
        x = self.features(x)
        x = self.pool(x)
        x = torch.flatten(x, 1)
        x = self.embedding(x)
        x = self.l2_norm(x, dim=1)
        return x


In [16]:
# ✅ 셀 3: Triplet Dataset 클래스 정의
class TripletDataset(Dataset):
    def __init__(self, root_dir, song_ids=None, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        if song_ids is None:
            self.song_dirs = [os.path.join(root_dir, d) for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]
        else:
            self.song_dirs = [os.path.join(root_dir, d) for d in song_ids]
        self.data = []
        for song_dir in self.song_dirs:
            images = [f for f in os.listdir(song_dir) if f.lower().endswith(('.png','.jpg','.jpeg'))]
            if len(images) >= 2:
                self.data.append((song_dir, images))

    def __len__(self):
        return sum(len(images) for _, images in self.data)

    def __getitem__(self, idx):
        start = time.time()

        anchor_song_idx = random.randint(0, len(self.data) - 1)
        anchor_song_dir, anchor_images = self.data[anchor_song_idx]
        anchor_img_name = random.choice(anchor_images)
        positive_img_name = anchor_img_name
        while positive_img_name == anchor_img_name:
            positive_img_name = random.choice(anchor_images)
        negative_song_idx = anchor_song_idx
        while negative_song_idx == anchor_song_idx:
            negative_song_idx = random.randint(0, len(self.data) - 1)
        negative_song_dir, negative_images = self.data[negative_song_idx]
        negative_img_name = random.choice(negative_images)

        anchor_img = Image.open(os.path.join(anchor_song_dir, anchor_img_name)).convert('RGB')
        positive_img = Image.open(os.path.join(anchor_song_dir, positive_img_name)).convert('RGB')
        negative_img = Image.open(os.path.join(negative_song_dir, negative_img_name)).convert('RGB')
        if self.transform:
            anchor_img = self.transform(anchor_img)
            positive_img = self.transform(positive_img)
            negative_img = self.transform(negative_img)
        print(f"샘플 로딩 시간: {time.time() - start:.4f}초")
        return anchor_img, positive_img, negative_img


In [17]:
# ✅ 셀 4: 전처리, 데이터셋 및 데이터로더 설정
transform = transforms.Compose([
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1)),
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])

train_dataset = TripletDataset(root_dir=data_root, song_ids=train_ids, transform=transform)
val_dataset = TripletDataset(root_dir=data_root, song_ids=val_ids, transform=transform)

train_loader = DataLoader(
    train_dataset,
    batch_size=32,
    shuffle=True,
    num_workers=4,      # 🚀 병렬 이미지 로딩
    pin_memory=True     # 🚀 GPU 전송 속도 최적화
)

val_loader = DataLoader(
    val_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=2,      # 검증 데이터는 조금 적어도 OK
    pin_memory=True
)



In [18]:
def train_triplet(model, data_loader, optimizer, loss_fn, device):
    model.train()
    total_loss = 0
    for i, (anchor, positive, negative) in enumerate(data_loader):
        print(f"Batch {i+1} 시작")  # 배치 번호 출력
        anchor, positive, negative = anchor.to(device), positive.to(device), negative.to(device)
        optimizer.zero_grad()
        anchor_embed = model(anchor)
        positive_embed = model(positive)
        negative_embed = model(negative)
        loss = loss_fn(anchor_embed, positive_embed, negative_embed)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        print(f"Batch {i+1} 완료, Loss: {loss.item():.4f}")  # 배치 끝날 때 출력
    return total_loss / len(data_loader)


@torch.no_grad()
def validate_triplet(model, data_loader, loss_fn, device):
    model.eval()
    total_loss = 0
    for anchor, positive, negative in data_loader:
        anchor, positive, negative = anchor.to(device), positive.to(device), negative.to(device)
        anchor_embed = model(anchor)
        positive_embed = model(positive)
        negative_embed = model(negative)
        loss = loss_fn(anchor_embed, positive_embed, negative_embed)
        total_loss += loss.item()
    return total_loss / len(data_loader)


In [19]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cuda


In [20]:
print("CUDA:", torch.version.cuda)  # ✅
print("is_available:", torch.cuda.is_available())  # ✅
print("Device:", torch.cuda.get_device_name(0))  # ✅


CUDA: 12.1
is_available: True
Device: NVIDIA GeForce RTX 4070


In [None]:
# ✅ 셀 6: 학습 실행
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = EfficientNetEmbedding(embedding_size=128).to(device)
loss_fn = nn.TripletMarginLoss(margin=1.0)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

num_epochs = 10
for epoch in range(num_epochs):
    train_loss = train_triplet(model, train_loader, optimizer, loss_fn, device)
    val_loss = validate_triplet(model, val_loader, loss_fn, device)
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
    torch.save(model.state_dict(), f"model_epoch_{epoch+1}.pth")




In [None]:
# ✅ 셀 7: 임베딩 추출 및 추천 함수
def extract_embeddings(model, inputs, device, batch_size=64):
    model.eval()
    embeddings = []
    with torch.no_grad():
        if isinstance(inputs, DataLoader):
            for batch in inputs:
                batch = batch.to(device)
                emb = model(batch)
                embeddings.append(emb.cpu().numpy())
            embeddings = np.vstack(embeddings)
        else:
            inputs = inputs.to(device)
            emb = model(inputs)
            embeddings = emb.cpu().numpy()
    return embeddings

def recommend_topk(query_embedding, gallery_embeddings, gallery_ids, topk=5):
    sims = cosine_similarity(query_embedding.reshape(1, -1), gallery_embeddings).flatten()
    topk_idx = sims.argsort()[::-1][:topk]
    return [(gallery_ids[i], sims[i]) for i in topk_idx]


In [None]:
# ✅ 셀 8: 추론 및 추천 실행
gallery_ids = train_ids + val_ids
gallery_dataset = TripletDataset(root_dir=data_root, song_ids=gallery_ids, transform=transform)
gallery_loader = DataLoader(gallery_dataset, batch_size=64, shuffle=False, num_workers=4)
gallery_embeddings = extract_embeddings(model, gallery_loader, device)
np.save("gallery_embeddings.npy", gallery_embeddings)

test_img_path = r"C:\ts_data_triplet_split\test"
test_img = Image.open(test_img_path).convert('RGB')
test_img_tensor = transform(test_img).unsqueeze(0)
query_embedding = extract_embeddings(model, test_img_tensor, device)

gallery_id_names = [os.path.basename(d) for d, _ in gallery_dataset.data]
recommendations = recommend_topk(query_embedding, gallery_embeddings, gallery_id_names, topk=5)

print("🎧 추천 결과:")
for i, (song_id, score) in enumerate(recommendations, 1):
    print(f"{i}. 곡 ID: {song_id} (유사도: {score:.4f})")


## **전체 코드... 파일 구조 나눠야함..**

In [None]:
# ✅ 셀 1: 모듈 import 및 경로 설정
import os
import random
import torch
import torch.nn as nn
import torchvision.models as models
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from PIL import Image
import time


data_root = r"C:\ts_data_triplet_split\train"
song_ids = sorted([d for d in os.listdir(data_root) if os.path.isdir(os.path.join(data_root, d))])
random.seed(42)
random.shuffle(song_ids)
split_ratio = 0.8
split_index = int(len(song_ids) * split_ratio)
train_ids = song_ids[:split_index]
val_ids = song_ids[split_index:]

# ✅ 셀 2: EfficientNet 임베딩 모델 정의
class EfficientNetEmbedding(nn.Module):
    def __init__(self, embedding_size=128):
        super().__init__()
        self.base_model = models.efficientnet_b0(pretrained=True)
        for param in self.base_model.parameters():
            param.requires_grad = False
        self.features = self.base_model.features
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.embedding = nn.Linear(1280, embedding_size)
        self.l2_norm = nn.functional.normalize

    def forward(self, x):
        x = self.features(x)
        x = self.pool(x)
        x = torch.flatten(x, 1)
        x = self.embedding(x)
        x = self.l2_norm(x, dim=1)
        return x


# ✅ 셀 3: Triplet Dataset 클래스 정의
class TripletDataset(Dataset):
    def __init__(self, root_dir, song_ids=None, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        if song_ids is None:
            self.song_dirs = [os.path.join(root_dir, d) for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]
        else:
            self.song_dirs = [os.path.join(root_dir, d) for d in song_ids]
        self.data = []
        for song_dir in self.song_dirs:
            images = [f for f in os.listdir(song_dir) if f.lower().endswith(('.png','.jpg','.jpeg'))]
            if len(images) >= 2:
                self.data.append((song_dir, images))

    def __len__(self):
        return sum(len(images) for _, images in self.data)

    def __getitem__(self, idx):
        start = time.time()

        anchor_song_idx = random.randint(0, len(self.data) - 1)
        anchor_song_dir, anchor_images = self.data[anchor_song_idx]
        anchor_img_name = random.choice(anchor_images)
        positive_img_name = anchor_img_name
        while positive_img_name == anchor_img_name:
            positive_img_name = random.choice(anchor_images)
        negative_song_idx = anchor_song_idx
        while negative_song_idx == anchor_song_idx:
            negative_song_idx = random.randint(0, len(self.data) - 1)
        negative_song_dir, negative_images = self.data[negative_song_idx]
        negative_img_name = random.choice(negative_images)

        anchor_img = Image.open(os.path.join(anchor_song_dir, anchor_img_name)).convert('RGB')
        positive_img = Image.open(os.path.join(anchor_song_dir, positive_img_name)).convert('RGB')
        negative_img = Image.open(os.path.join(negative_song_dir, negative_img_name)).convert('RGB')
        if self.transform:
            anchor_img = self.transform(anchor_img)
            positive_img = self.transform(positive_img)
            negative_img = self.transform(negative_img)
        return anchor_img, positive_img, negative_img

# ✅ 셀 4: 전처리, 데이터셋 및 데이터로더 설정
transform = transforms.Compose([
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1)),
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])

train_dataset = TripletDataset(root_dir=data_root, song_ids=train_ids, transform=transform)
val_dataset = TripletDataset(root_dir=data_root, song_ids=val_ids, transform=transform)

train_loader = DataLoader(
    train_dataset,
    batch_size=32,
    shuffle=True,
    num_workers=0,      # 🚀 병렬 이미지 로딩
    pin_memory=True     # 🚀 GPU 전송 속도 최적화
)

val_loader = DataLoader(
    val_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=0,      # 검증 데이터는 조금 적어도 OK
    pin_memory=True
)

def train_triplet(model, data_loader, optimizer, loss_fn, device):
    model.train()
    total_loss = 0
    for i, (anchor, positive, negative) in enumerate(data_loader):
        anchor, positive, negative = anchor.to(device), positive.to(device), negative.to(device)
        optimizer.zero_grad()
        anchor_embed = model(anchor)
        positive_embed = model(positive)
        negative_embed = model(negative)
        loss = loss_fn(anchor_embed, positive_embed, negative_embed)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(data_loader)


@torch.no_grad()
def validate_triplet(model, data_loader, loss_fn, device):
    model.eval()
    total_loss = 0
    for anchor, positive, negative in data_loader:
        anchor, positive, negative = anchor.to(device), positive.to(device), negative.to(device)
        anchor_embed = model(anchor)
        positive_embed = model(positive)
        negative_embed = model(negative)
        loss = loss_fn(anchor_embed, positive_embed, negative_embed)
        total_loss += loss.item()
    return total_loss / len(data_loader)


# ✅ 셀 6: 학습 실행 -- 6번까진 잘 돌아감. 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = EfficientNetEmbedding(embedding_size=128).to(device)
loss_fn = nn.TripletMarginLoss(margin=1.0)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
''' 학습 완료시 필요없음.
num_epochs = 10
for epoch in range(num_epochs):
    train_loss = train_triplet(model, train_loader, optimizer, loss_fn, device)
    val_loss = validate_triplet(model, val_loader, loss_fn, device)
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")
    torch.save(model.state_dict(), f"model_epoch_{epoch+1}.pth")
'''

#학습 완료시 저장된 가중치 불러오기.
model.load_state_dict(torch.load("model_epoch_10.pth", map_location=device))  # 파일명 확인
model.eval()

# ✅ 셀 7: 임베딩 추출 및 추천 함수
def extract_embeddings(model, inputs, device, batch_size=64):
    print("[DEBUG] ▶ 임베딩 추출 시작")
    model.eval()
    embeddings = []
    with torch.no_grad():
        if isinstance(inputs, DataLoader):
            if len(inputs.dataset) == 0:
                print("[ERROR] ⚠️ DataLoader의 Dataset이 비어 있습니다.")
                return np.array([])  # 👈 빈 배열 리턴하거나 종료 처리
            
            print(f"[DEBUG] ▶ inputs 길이: {len(inputs.dataset)}")
            for batch in inputs:
                print(f"[DEBUG] ▶ 배치 {i+1} 추출 중...")
                batch = batch.to(device)
                emb = model(batch)
                print(f"[DEBUG] ▶ 배치 {i+1} 임베딩 shape: {emb.shape}")
                embeddings.append(emb.cpu().numpy())
            embeddings = np.vstack(embeddings)
            print(f"[DEBUG] ▶ 전체 임베딩 shape: {embeddings.shape}")
        else:
            print("[DEBUG] ▶ 단일 이미지 임베딩 추출")
            inputs = inputs.to(device)
            emb = model(inputs)
            print(f"[DEBUG] ▶ 단일 임베딩 shape: {emb.shape}")
            embeddings = emb.cpu().numpy()
    print("[DEBUG] ▶ 임베딩 추출 완료")
    return embeddings

def recommend_topk(query_embedding, gallery_embeddings, gallery_ids, topk=5):
    sims = cosine_similarity(query_embedding.reshape(1, -1), gallery_embeddings).flatten()
    topk_idx = sims.argsort()[::-1][:topk]
    return [(gallery_ids[i], sims[i]) for i in topk_idx]

# ✅ 셀 7,8사이 : 싱글데이터셋 만드는 클래스 필요.
class SingleImageDataset(Dataset):
    def __init__(self, root_dir, song_ids=None, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.image_ids = []
        song_dirs = [os.path.join(root_dir, d) for d in song_ids]
        for song_dir in song_dirs:
            images = [f for f in os.listdir(song_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
            for img in images:
                self.image_paths.append(os.path.join(song_dir, img))
                self.image_ids.append(os.path.basename(song_dir))

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        print(f"[DEBUG] ▶ 이미지 로드 시작: {self.image_paths[idx]}")
        img = Image.open(self.image_paths[idx]).convert('RGB')
        if self.transform:
            img = self.transform(img)
        return img


# ✅ 셀 8: 추론 및 추천 실행
# ✅ 갤러리 데이터 준비 및 임베딩 추출. 한번만 해서 저장해놨다가 추천할 때 사용.
gallery_ids = train_ids + val_ids
gallery_dataset = SingleImageDataset(root_dir=data_root, song_ids=gallery_ids, transform=transform)

gallery_loader = DataLoader(gallery_dataset, batch_size=64, shuffle=False, num_workers=4)
gallery_embeddings = extract_embeddings(model, gallery_loader, device)
np.save("gallery_embeddings.npy", gallery_embeddings)

# ✅ 갤러리 ID 이름 추출
gallery_id_names = [os.path.basename(d) for d, _ in gallery_dataset.data]

# ✅ 테스트 이미지 디렉토리 지정
test_img_dir = r"C:\ts_data_triplet_split\test"

# ✅ 테스트 이미지들 중 무작위로 10개 선택
all_test_paths = [
    os.path.join(test_img_dir, f)
    for f in os.listdir(test_img_dir)
    if f.lower().endswith(('.png', '.jpg', '.jpeg'))
]
random.seed(42)
test_img_paths = random.sample(all_test_paths, min(10, len(all_test_paths)))

# ✅ 테스트 이미지 하나씩 추천 수행
for test_img_path in test_img_paths:
    print(f"\n[DEBUG] ▶ 테스트 이미지: {test_img_path}")
    test_img = Image.open(test_img_path).convert('RGB')
    test_img_tensor = transform(test_img).unsqueeze(0)  # (1, C, H, W)
    query_embedding = extract_embeddings(model, test_img_tensor, device)

    print("[DEBUG] ▶ 유사도 계산 중...")
    recommendations = recommend_topk(query_embedding, gallery_embeddings, gallery_id_names, topk=5)

    print(f"\n🎧 [{os.path.basename(test_img_path)}]에 대한 추천 결과:")
    for i, (song_id, score) in enumerate(recommendations, 1):
        print(f"{i}. 곡 ID: {song_id} (유사도: {score:.4f})")




Epoch 1/10, Train Loss: 0.8948, Val Loss: 0.8582
Epoch 2/10, Train Loss: 0.8108, Val Loss: 0.7988
Epoch 3/10, Train Loss: 0.7629, Val Loss: 0.7507
Epoch 4/10, Train Loss: 0.7410, Val Loss: 0.7102
Epoch 5/10, Train Loss: 0.7000, Val Loss: 0.6907
Epoch 6/10, Train Loss: 0.6994, Val Loss: 0.7181
Epoch 7/10, Train Loss: 0.6782, Val Loss: 0.6610
Epoch 8/10, Train Loss: 0.6557, Val Loss: 0.6797
Epoch 9/10, Train Loss: 0.6722, Val Loss: 0.6530
Epoch 10/10, Train Loss: 0.6613, Val Loss: 0.6443
