In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
import torch

if torch.cuda.is_available():
    print("✅ GPU Available!")
    print("GPU :", torch.cuda.get_device_name(0))

✅ GPU 사용 가능!
GPU 이름: Tesla T4


In [17]:
import os
import torch
from torch.utils.data import Dataset

# 01 = neutral, 02 = calm, 03 = happy, 04 = sad, 05 = angry, 06 = fearful, 07 = disgust, 08 = surprised
label_to_score = {
    0: 0.0,  # neutral
    1: 0.0,  # calm
    2: 0.0,  # happy
    3: 0.4,  # sad
    4: 0.7,  # angry
    5: 1.0,  # fearful
    6: 0.5,  # disgust
    7: 0.3,  # surprised
}

class MelSpectrogramDataset(Dataset):
    def __init__(self, data, use_path_list=False):
        if use_path_list:
            self.file_list = data
        else:
            self.file_list = [
                os.path.join(data, fname)
                for fname in os.listdir(data)
                if fname.endswith('.pt') and 'mel' in fname
            ]
        self.file_list.sort()

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        path = self.file_list[idx]
        mel = torch.load(path)

        if mel.dim() == 2:
            mel = mel.unsqueeze(0)  # (1, H, W)
        elif mel.shape[0] != 1:
            mel = mel[:1, :, :]  # 첫 채널만 가져옴

        # 파일명 예시: mel_00003_4.pt → label은 맨 끝
        label = int(os.path.basename(path).split('_')[-1].replace(".pt", ""))
        label = label_to_score[label]


        return mel, torch.tensor(label, dtype=torch.float32)


In [18]:
from torchvision import models
import torch.nn as nn
class EmotionCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = models.resnet18(pretrained=True)


        self.model.conv1 = nn.Conv2d(
            in_channels=1, out_channels=64,
            kernel_size=7, stride=2, padding=3, bias=False
        )

        in_features = self.model.fc.in_features
        self.model.fc = nn.Sequential(
            nn.Linear(in_features, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

In [19]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
import os
from glob import glob

from tqdm import tqdm


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

all_files = sorted(glob("/content/drive/MyDrive/processed_data/mel_*.pt"))

train_files, val_files = train_test_split(all_files, test_size=0.2, random_state=42)

train_dataset = MelSpectrogramDataset(train_files, use_path_list=True)
val_dataset = MelSpectrogramDataset(val_files, use_path_list=True)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

for mel, label in train_loader:
    print(mel.shape)  # (batch_size, 1, 128, 313)
    break

model = EmotionCNN().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    train_loss = 0

    train_loop = tqdm(train_loader, desc=f"[Epoch {epoch+1}/{num_epochs}] Training", leave=False)

    for mel, label in train_loader:
        mel = mel.to(device)
        label = label.to(device).float().view(-1, 1)

        optimizer.zero_grad()
        output = model(mel)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * mel.size(0)
        train_loop.set_postfix(loss=loss.item())

    train_loss /= len(train_loader.dataset)

    model.eval()
    val_loss = 0
    with torch.no_grad():
        for mel, label in val_loader:
            mel = mel.to(device)
            label = label.to(device).float().view(-1, 1)

            output = model(mel)
            loss = criterion(output, label)
            val_loss += loss.item() * mel.size(0)

    val_loss /= len(val_loader.dataset)

    print(f"[Epoch {epoch+1}] Train Loss: {train_loss:.4f} || Val Loss: {val_loss:.4f}")



Using device: cuda
torch.Size([32, 1, 128, 313])


  mel = torch.load(path)
[Epoch 1/10] Training:   0%|          | 0/36 [00:00<?, ?it/s]
[Epoch 1/10] Training:   0%|          | 0/36 [00:07<?, ?it/s, loss=0.0894]

[Epoch 1] Train Loss: 0.1042 || Val Loss: 0.0839



                                                                          
[Epoch 2/10] Training:   0%|          | 0/36 [00:00<?, ?it/s, loss=0.0576][A
[Epoch 2/10] Training:   0%|          | 0/36 [00:00<?, ?it/s, loss=0.0886][A
[Epoch 2/10] Training:   0%|          | 0/36 [00:00<?, ?it/s, loss=0.104] [A
[Epoch 2/10] Training:   0%|          | 0/36 [00:00<?, ?it/s, loss=0.109][A
[Epoch 2/10] Training:   0%|          | 0/36 [00:00<?, ?it/s, loss=0.064][A
[Epoch 2/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0708][A
[Epoch 2/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0893][A
[Epoch 2/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0683][A
[Epoch 2/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0528][A
[Epoch 2/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0601][A
[Epoch 2/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0732][A
[Epoch 2/10] Training:   0%|          | 0/36 [00:02<?, ?it/s, loss=0

[Epoch 2] Train Loss: 0.0812 || Val Loss: 0.0767


[Epoch 3/10] Training:   0%|          | 0/36 [00:00<?, ?it/s]
[Epoch 3/10] Training:   0%|          | 0/36 [00:06<?, ?it/s, loss=0.0597]

[Epoch 3] Train Loss: 0.0695 || Val Loss: 0.1372



[Epoch 4/10] Training:   0%|          | 0/36 [00:00<?, ?it/s][A

[Epoch 4/10] Training:   0%|          | 0/36 [00:00<?, ?it/s, loss=0.0774][A
[Epoch 4/10] Training:   0%|          | 0/36 [00:00<?, ?it/s, loss=0.0647][A
[Epoch 4/10] Training:   0%|          | 0/36 [00:00<?, ?it/s, loss=0.0709][A
[Epoch 4/10] Training:   0%|          | 0/36 [00:00<?, ?it/s, loss=0.0753][A
[Epoch 4/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0593][A
[Epoch 4/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0668][A
[Epoch 4/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0721][A
[Epoch 4/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0536][A
[Epoch 4/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0945][A
[Epoch 4/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0887][A
[Epoch 4/10] Training:   0%|          | 0/36 [00:02<?, ?it/s, loss=0.0837][A
[Epoch 4/10] Training:   0%|          | 0/36 [00:02<?, ?it/s, loss=0.0714]

[Epoch 4] Train Loss: 0.0713 || Val Loss: 0.1089


[Epoch 5/10] Training:   0%|          | 0/36 [00:00<?, ?it/s]
[Epoch 5/10] Training:   0%|          | 0/36 [00:06<?, ?it/s, loss=0.0524]

[Epoch 5] Train Loss: 0.0499 || Val Loss: 0.0728



                                                                          
[Epoch 6/10] Training:   0%|          | 0/36 [00:00<?, ?it/s, loss=0.0236][A
[Epoch 6/10] Training:   0%|          | 0/36 [00:00<?, ?it/s, loss=0.024] [A
[Epoch 6/10] Training:   0%|          | 0/36 [00:00<?, ?it/s, loss=0.0153][A
[Epoch 6/10] Training:   0%|          | 0/36 [00:00<?, ?it/s, loss=0.0407][A
[Epoch 6/10] Training:   0%|          | 0/36 [00:00<?, ?it/s, loss=0.0648][A
[Epoch 6/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0511][A
[Epoch 6/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0531][A
[Epoch 6/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.034] [A
[Epoch 6/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0517][A
[Epoch 6/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0303][A
[Epoch 6/10] Training:   0%|          | 0/36 [00:02<?, ?it/s, loss=0.0204][A
[Epoch 6/10] Training:   0%|          | 0/36 [00:02<?, ?it/s, loss

[Epoch 6] Train Loss: 0.0429 || Val Loss: 0.2661


[Epoch 7/10] Training:   0%|          | 0/36 [00:00<?, ?it/s]
[Epoch 7/10] Training:   0%|          | 0/36 [00:06<?, ?it/s, loss=0.0323]

[Epoch 7] Train Loss: 0.0410 || Val Loss: 0.0824



                                                                          
[Epoch 8/10] Training:   0%|          | 0/36 [00:00<?, ?it/s, loss=0.0252][A
[Epoch 8/10] Training:   0%|          | 0/36 [00:00<?, ?it/s, loss=0.0242][A
[Epoch 8/10] Training:   0%|          | 0/36 [00:00<?, ?it/s, loss=0.0269][A
[Epoch 8/10] Training:   0%|          | 0/36 [00:00<?, ?it/s, loss=0.0352][A
[Epoch 8/10] Training:   0%|          | 0/36 [00:00<?, ?it/s, loss=0.0182][A
[Epoch 8/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0338][A
[Epoch 8/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0209][A
[Epoch 8/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0141][A
[Epoch 8/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0365][A
[Epoch 8/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0198][A
[Epoch 8/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0463][A
[Epoch 8/10] Training:   0%|          | 0/36 [00:02<?, ?it/s, loss

[Epoch 8] Train Loss: 0.0347 || Val Loss: 0.2740


[Epoch 9/10] Training:   0%|          | 0/36 [00:00<?, ?it/s]
[Epoch 9/10] Training:   0%|          | 0/36 [00:06<?, ?it/s, loss=0.035] 

[Epoch 9] Train Loss: 0.0266 || Val Loss: 0.1265



                                                                         
[Epoch 10/10] Training:   0%|          | 0/36 [00:00<?, ?it/s, loss=0.0257][A
[Epoch 10/10] Training:   0%|          | 0/36 [00:00<?, ?it/s, loss=0.0143][A
[Epoch 10/10] Training:   0%|          | 0/36 [00:00<?, ?it/s, loss=0.0208][A
[Epoch 10/10] Training:   0%|          | 0/36 [00:00<?, ?it/s, loss=0.0174][A
[Epoch 10/10] Training:   0%|          | 0/36 [00:00<?, ?it/s, loss=0.0222][A
[Epoch 10/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0173][A
[Epoch 10/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0285][A
[Epoch 10/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0341][A
[Epoch 10/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0233][A
[Epoch 10/10] Training:   0%|          | 0/36 [00:01<?, ?it/s, loss=0.0201][A
[Epoch 10/10] Training:   0%|          | 0/36 [00:02<?, ?it/s, loss=0.0141][A
[Epoch 10/10] Training:   0%|          | 0/36 [00:02<?, 

[Epoch 10] Train Loss: 0.0232 || Val Loss: 0.0586
