In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.model_selection import StratifiedKFold
from transformers import HubertForSequenceClassification, AutoConfig, Wav2Vec2FeatureExtractor
import pandas as pd
import librosa
import numpy as np
from tqdm import tqdm

In [2]:
df = pd.read_csv('../dataset/train.csv')

In [3]:
def convert_labels(label):
    if label == 'fake':
        return 0
    elif label == 'real':
        return 1
    else:
        raise ValueError(f"Unknown label: {label}")

# apply 함수를 사용하여 라벨 변환
df['label'] = df['label'].apply(convert_labels)

In [4]:
# 디바이스 설정
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

# 모델 및 특징 추출기 설정
model_name_or_path = 'facebook/hubert-large-ll60k'
config = AutoConfig.from_pretrained(model_name_or_path, num_labels=2)
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name_or_path)
sampling_rate = feature_extractor.sampling_rate



In [5]:
# 음성 파일을 배열로 변환하는 함수
def speech_file_to_array_fn(path):
    audio, _ = librosa.load(path, sr=sampling_rate)
    inputs = feature_extractor(audio, sampling_rate=sampling_rate, return_tensors="pt", padding=True)
    return inputs.input_values.squeeze()

# 모델 클래스 정의
class EModel(nn.Module):
    def __init__(self):
        super(EModel, self).__init__()
        self.backbone = HubertForSequenceClassification.from_pretrained(model_name_or_path, config=config)

    def forward(self, x):
        return self.backbone(x).logits

In [6]:
# Collate 함수 정의
def collate_fn(batch):
    signals, labels = zip(*batch)
    max_length = max([signal.size(0) for signal in signals])
    padded_signals = torch.zeros(len(signals), max_length)
    for i, signal in enumerate(signals):
        padded_signals[i, :signal.size(0)] = signal
    labels = torch.tensor(labels)
    return padded_signals, labels

# Mixup 함수 정의 (필요한 경우)
def mixup_data(x, y, alpha=1.0):
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1
    batch_size = x.size()[0]
    index = torch.randperm(batch_size).to(x.device)
    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

In [7]:
# Trainer 함수 정의
def trainer(model, train_loader, loss_fn, optimizer, epoch):
    model.train()
    train_loss = 0
    step = 0
    for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1} [Training]"):
        try:
            inputs = inputs.to(device)
            labels = labels.to(device)

            if step % 4 == 0:
                x_batch, y_batch_a, y_batch_b, lam = mixup_data(inputs, labels)
                outputs = model(x_batch)
                loss = mixup_criterion(loss_fn, outputs, y_batch_a.to(device), y_batch_b.to(device), lam)
            else:
                outputs = model(inputs)
                loss = loss_fn(outputs, labels)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.detach().cpu().item()
            step += 1
        except Exception as e:
            print(f"Error during training: {e}")
    print(f'EPOCH : {epoch} | train_loss : {train_loss/len(train_loader):.4f}')

# Validator 함수 정의 (필요한 경우)
def validator(model, valid_loader, loss_fn, epoch, fold, scheduler):
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in tqdm(valid_loader, desc=f"Epoch {epoch+1} [Validation]"):
            try:
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                outputs = model(inputs)
                loss = loss_fn(outputs, labels)
                val_loss += loss.item()
                
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            except Exception as e:
                print(f"Error during validation: {e}")
    
    val_loss /= len(valid_loader)
    val_accuracy = correct / total
    scheduler.step(val_loss)
    
    torch.save(model.state_dict(), f'modelW_3_0.001_{val_accuracy:.6f}_{val_loss:.6f}.pth')
    print(f'Fold {fold+1}, Epoch {epoch + 1}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')


In [8]:
from transformers import AdamW

# 모델 로드 (가정: 이미 로드된 상태)
model = EModel().to(device)
loss_fn = nn.CrossEntropyLoss()

# 전체 모델의 파라미터를 고정
for param in model.parameters():
    param.requires_grad = False


for param in model.backbone.classifier.parameters():
    param.requires_grad = True

# 학습 가능한 파라미터만 옵티마이저에 전달
optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-5, weight_decay=1e-3)

scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=1)


Some weights of HubertForSequenceClassification were not initialized from the model checkpoint at facebook/hubert-large-ll60k and are newly initialized: ['classifier.bias', 'classifier.weight', 'encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'projector.bias', 'projector.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
'''
# 모델, 손실 함수, 옵티마이저, 스케줄러 정의
model = EModel().to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=1)
'''

'\n# 모델, 손실 함수, 옵티마이저, 스케줄러 정의\nmodel = EModel().to(device)\nloss_fn = nn.CrossEntropyLoss()\noptimizer = torch.optim.Adam(model.parameters(), lr=1e-5)\nscheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=1)\n'

In [10]:
# 데이터셋 클래스 정의
class EMDataset(Dataset):
    def __init__(self, df):
        self.df = df

    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, idx):
        path = os.path.join("..", "dataset", self.df.loc[idx, 'path'])
        if not os.path.exists(path):
            raise FileNotFoundError(f"파일을 찾을 수 없습니다: {path}")
        signal = speech_file_to_array_fn(path)        
        label = self.df.loc[idx, 'label']
        return signal, label

In [11]:
# Stratified K-Fold 설정
k_split = StratifiedKFold(n_splits=3, random_state=42, shuffle=True)

In [12]:
# 교차 검증을 통한 모델 훈련
for k, (t_idx, v_idx) in enumerate(k_split.split(df, df['label'])):
    print(f"Fold {k + 1}")
    
    train_df, valid_df = df.loc[t_idx].reset_index(drop=True), df.loc[v_idx].reset_index(drop=True)    

    train_dataset = EMDataset(train_df)
    valid_dataset = EMDataset(valid_df)

    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, collate_fn=collate_fn,num_workers=24)
    valid_loader = DataLoader(valid_dataset, batch_size=16, shuffle=False, collate_fn=collate_fn,num_workers=24)

    for epoch in range(10):
        trainer(model, train_loader, loss_fn, optimizer, epoch)
        validator(model, valid_loader, loss_fn, epoch, k, scheduler)

print("Training complete!")

Fold 1


  return F.conv1d(input, weight, bias, self.stride,
Epoch 1 [Training]: 100%|██████████| 2772/2772 [06:03<00:00,  7.64it/s]


EPOCH : 0 | train_loss : 0.6720


Epoch 1 [Validation]: 100%|██████████| 693/693 [00:42<00:00, 16.43it/s]


Fold 1, Epoch 1, Validation Loss: 0.6642, Validation Accuracy: 0.6111


Epoch 2 [Training]: 100%|██████████| 2772/2772 [06:08<00:00,  7.53it/s]


EPOCH : 1 | train_loss : 0.6504


Epoch 2 [Validation]: 100%|██████████| 693/693 [00:42<00:00, 16.34it/s]


Fold 1, Epoch 2, Validation Loss: 0.6500, Validation Accuracy: 0.6769


Epoch 3 [Training]: 100%|██████████| 2772/2772 [06:10<00:00,  7.48it/s]


EPOCH : 2 | train_loss : 0.6362


Epoch 3 [Validation]: 100%|██████████| 693/693 [00:46<00:00, 14.77it/s]


Fold 1, Epoch 3, Validation Loss: 0.6415, Validation Accuracy: 0.7514


Epoch 4 [Training]: 100%|██████████| 2772/2772 [06:07<00:00,  7.53it/s]


EPOCH : 3 | train_loss : 0.6219


Epoch 4 [Validation]: 100%|██████████| 693/693 [00:43<00:00, 16.05it/s]


Fold 1, Epoch 4, Validation Loss: 0.6353, Validation Accuracy: 0.8014


Epoch 5 [Training]: 100%|██████████| 2772/2772 [06:08<00:00,  7.51it/s]


EPOCH : 4 | train_loss : 0.6095


Epoch 5 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.26it/s]


Fold 1, Epoch 5, Validation Loss: 0.6310, Validation Accuracy: 0.7922


Epoch 6 [Training]: 100%|██████████| 2772/2772 [06:10<00:00,  7.48it/s]


EPOCH : 5 | train_loss : 0.5988


Epoch 6 [Validation]: 100%|██████████| 693/693 [00:43<00:00, 15.96it/s]


Fold 1, Epoch 6, Validation Loss: 0.6296, Validation Accuracy: 0.7488


Epoch 7 [Training]: 100%|██████████| 2772/2772 [06:11<00:00,  7.47it/s]


EPOCH : 6 | train_loss : 0.5895


Epoch 7 [Validation]: 100%|██████████| 693/693 [00:43<00:00, 15.93it/s]


Fold 1, Epoch 7, Validation Loss: 0.6289, Validation Accuracy: 0.7020


Epoch 8 [Training]: 100%|██████████| 2772/2772 [06:11<00:00,  7.47it/s]


EPOCH : 7 | train_loss : 0.5797


Epoch 8 [Validation]: 100%|██████████| 693/693 [00:43<00:00, 15.87it/s]


Fold 1, Epoch 8, Validation Loss: 0.6289, Validation Accuracy: 0.6688


Epoch 9 [Training]: 100%|██████████| 2772/2772 [06:13<00:00,  7.42it/s]


EPOCH : 8 | train_loss : 0.5728


Epoch 9 [Validation]: 100%|██████████| 693/693 [00:43<00:00, 15.99it/s]


Fold 1, Epoch 9, Validation Loss: 0.6324, Validation Accuracy: 0.6314


Epoch 10 [Training]: 100%|██████████| 2772/2772 [06:16<00:00,  7.36it/s]


EPOCH : 9 | train_loss : 0.5647


Epoch 10 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.24it/s]


Fold 1, Epoch 10, Validation Loss: 0.6334, Validation Accuracy: 0.6126


Epoch 11 [Training]: 100%|██████████| 2772/2772 [06:10<00:00,  7.49it/s]


EPOCH : 10 | train_loss : 0.5579


Epoch 11 [Validation]: 100%|██████████| 693/693 [00:43<00:00, 15.86it/s]


Fold 1, Epoch 11, Validation Loss: 0.6364, Validation Accuracy: 0.5964


Epoch 12 [Training]: 100%|██████████| 2772/2772 [06:14<00:00,  7.41it/s]


EPOCH : 11 | train_loss : 0.5530


Epoch 12 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.56it/s]


Fold 1, Epoch 12, Validation Loss: 0.6412, Validation Accuracy: 0.5786


Epoch 13 [Training]: 100%|██████████| 2772/2772 [06:14<00:00,  7.40it/s]


EPOCH : 12 | train_loss : 0.5463


Epoch 13 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.57it/s]


Fold 1, Epoch 13, Validation Loss: 0.6491, Validation Accuracy: 0.5599


Epoch 14 [Training]: 100%|██████████| 2772/2772 [06:19<00:00,  7.30it/s]


EPOCH : 13 | train_loss : 0.5413


Epoch 14 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.21it/s]


Fold 1, Epoch 14, Validation Loss: 0.6522, Validation Accuracy: 0.5555


Epoch 15 [Training]: 100%|██████████| 2772/2772 [06:16<00:00,  7.36it/s]


EPOCH : 14 | train_loss : 0.5345


Epoch 15 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.48it/s]


Fold 1, Epoch 15, Validation Loss: 0.6590, Validation Accuracy: 0.5464
Fold 2


  return F.conv1d(input, weight, bias, self.stride,
Epoch 1 [Training]: 100%|██████████| 2772/2772 [06:13<00:00,  7.42it/s]


EPOCH : 0 | train_loss : 0.5336


Epoch 1 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.41it/s]


Fold 2, Epoch 1, Validation Loss: 0.6695, Validation Accuracy: 0.5382


Epoch 2 [Training]: 100%|██████████| 2772/2772 [06:18<00:00,  7.32it/s]


EPOCH : 1 | train_loss : 0.5281


Epoch 2 [Validation]: 100%|██████████| 693/693 [00:46<00:00, 14.95it/s]


Fold 2, Epoch 2, Validation Loss: 0.6767, Validation Accuracy: 0.5322


Epoch 3 [Training]: 100%|██████████| 2772/2772 [06:15<00:00,  7.38it/s]


EPOCH : 2 | train_loss : 0.5232


Epoch 3 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.57it/s]


Fold 2, Epoch 3, Validation Loss: 0.6823, Validation Accuracy: 0.5294


Epoch 4 [Training]: 100%|██████████| 2772/2772 [06:16<00:00,  7.36it/s]


EPOCH : 3 | train_loss : 0.5228


Epoch 4 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.50it/s]


Fold 2, Epoch 4, Validation Loss: 0.6890, Validation Accuracy: 0.5266


Epoch 5 [Training]: 100%|██████████| 2772/2772 [06:17<00:00,  7.33it/s]


EPOCH : 4 | train_loss : 0.5181


Epoch 5 [Validation]: 100%|██████████| 693/693 [00:48<00:00, 14.29it/s]


Fold 2, Epoch 5, Validation Loss: 0.6965, Validation Accuracy: 0.5239


Epoch 6 [Training]: 100%|██████████| 2772/2772 [06:16<00:00,  7.36it/s]


EPOCH : 5 | train_loss : 0.5152


Epoch 6 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.35it/s]


Fold 2, Epoch 6, Validation Loss: 0.7008, Validation Accuracy: 0.5239


Epoch 7 [Training]: 100%|██████████| 2772/2772 [06:15<00:00,  7.38it/s]


EPOCH : 6 | train_loss : 0.5127


Epoch 7 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.53it/s]


Fold 2, Epoch 7, Validation Loss: 0.7103, Validation Accuracy: 0.5202


Epoch 8 [Training]: 100%|██████████| 2772/2772 [06:16<00:00,  7.36it/s]


EPOCH : 7 | train_loss : 0.5074


Epoch 8 [Validation]: 100%|██████████| 693/693 [00:48<00:00, 14.18it/s]


Fold 2, Epoch 8, Validation Loss: 0.7186, Validation Accuracy: 0.5180


Epoch 9 [Training]: 100%|██████████| 2772/2772 [06:16<00:00,  7.36it/s]


EPOCH : 8 | train_loss : 0.5068


Epoch 9 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.59it/s]


Fold 2, Epoch 9, Validation Loss: 0.7277, Validation Accuracy: 0.5151


Epoch 10 [Training]: 100%|██████████| 2772/2772 [06:18<00:00,  7.33it/s]


EPOCH : 9 | train_loss : 0.5021


Epoch 10 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.43it/s]


Fold 2, Epoch 10, Validation Loss: 0.7322, Validation Accuracy: 0.5151


Epoch 11 [Training]: 100%|██████████| 2772/2772 [06:20<00:00,  7.29it/s]


EPOCH : 10 | train_loss : 0.5021


Epoch 11 [Validation]: 100%|██████████| 693/693 [00:46<00:00, 15.01it/s]


Fold 2, Epoch 11, Validation Loss: 0.7383, Validation Accuracy: 0.5138


Epoch 12 [Training]: 100%|██████████| 2772/2772 [06:17<00:00,  7.34it/s]


EPOCH : 11 | train_loss : 0.5005


Epoch 12 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.44it/s]


Fold 2, Epoch 12, Validation Loss: 0.7468, Validation Accuracy: 0.5115


Epoch 13 [Training]: 100%|██████████| 2772/2772 [06:17<00:00,  7.33it/s]


EPOCH : 12 | train_loss : 0.4970


Epoch 13 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.07it/s]


Fold 2, Epoch 13, Validation Loss: 0.7511, Validation Accuracy: 0.5115


Epoch 14 [Training]: 100%|██████████| 2772/2772 [06:18<00:00,  7.32it/s]


EPOCH : 13 | train_loss : 0.4952


Epoch 14 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.21it/s]


Fold 2, Epoch 14, Validation Loss: 0.7619, Validation Accuracy: 0.5090


Epoch 15 [Training]: 100%|██████████| 2772/2772 [06:16<00:00,  7.36it/s]


EPOCH : 14 | train_loss : 0.4925


Epoch 15 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.53it/s]


Fold 2, Epoch 15, Validation Loss: 0.7627, Validation Accuracy: 0.5106
Fold 3


  return F.conv1d(input, weight, bias, self.stride,
Epoch 1 [Training]: 100%|██████████| 2772/2772 [06:18<00:00,  7.33it/s]


EPOCH : 0 | train_loss : 0.4947


Epoch 1 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.14it/s]


Fold 3, Epoch 1, Validation Loss: 0.7661, Validation Accuracy: 0.5111


Epoch 2 [Training]: 100%|██████████| 2772/2772 [06:20<00:00,  7.29it/s]


EPOCH : 1 | train_loss : 0.4915


Epoch 2 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.30it/s]


Fold 3, Epoch 2, Validation Loss: 0.7709, Validation Accuracy: 0.5109


Epoch 3 [Training]: 100%|██████████| 2772/2772 [06:17<00:00,  7.35it/s]


EPOCH : 2 | train_loss : 0.4913


Epoch 3 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.50it/s]


Fold 3, Epoch 3, Validation Loss: 0.7742, Validation Accuracy: 0.5110


Epoch 4 [Training]: 100%|██████████| 2772/2772 [06:17<00:00,  7.34it/s]


EPOCH : 3 | train_loss : 0.4892


Epoch 4 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.64it/s]


Fold 3, Epoch 4, Validation Loss: 0.7799, Validation Accuracy: 0.5109


Epoch 5 [Training]: 100%|██████████| 2772/2772 [06:19<00:00,  7.31it/s]


EPOCH : 4 | train_loss : 0.4891


Epoch 5 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.41it/s]


Fold 3, Epoch 5, Validation Loss: 0.7870, Validation Accuracy: 0.5106


Epoch 6 [Training]: 100%|██████████| 2772/2772 [06:22<00:00,  7.25it/s]


EPOCH : 5 | train_loss : 0.4850


Epoch 6 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.63it/s]


Fold 3, Epoch 6, Validation Loss: 0.7930, Validation Accuracy: 0.5101


Epoch 7 [Training]: 100%|██████████| 2772/2772 [06:19<00:00,  7.31it/s]


EPOCH : 6 | train_loss : 0.4873


Epoch 7 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.53it/s]


Fold 3, Epoch 7, Validation Loss: 0.7940, Validation Accuracy: 0.5107


Epoch 8 [Training]: 100%|██████████| 2772/2772 [06:20<00:00,  7.28it/s]


EPOCH : 7 | train_loss : 0.4831


Epoch 8 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.54it/s]


Fold 3, Epoch 8, Validation Loss: 0.7947, Validation Accuracy: 0.5111


Epoch 9 [Training]: 100%|██████████| 2772/2772 [06:18<00:00,  7.31it/s]


EPOCH : 8 | train_loss : 0.4819


Epoch 9 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.54it/s]


Fold 3, Epoch 9, Validation Loss: 0.8002, Validation Accuracy: 0.5111


Epoch 10 [Training]: 100%|██████████| 2772/2772 [06:18<00:00,  7.32it/s]


EPOCH : 9 | train_loss : 0.4800


Epoch 10 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.39it/s]


Fold 3, Epoch 10, Validation Loss: 0.8053, Validation Accuracy: 0.5109


Epoch 11 [Training]: 100%|██████████| 2772/2772 [06:19<00:00,  7.30it/s]


EPOCH : 10 | train_loss : 0.4819


Epoch 11 [Validation]: 100%|██████████| 693/693 [00:46<00:00, 14.78it/s]


Fold 3, Epoch 11, Validation Loss: 0.8148, Validation Accuracy: 0.5097


Epoch 12 [Training]: 100%|██████████| 2772/2772 [06:19<00:00,  7.30it/s]


EPOCH : 11 | train_loss : 0.4795


Epoch 12 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.46it/s]


Fold 3, Epoch 12, Validation Loss: 0.8173, Validation Accuracy: 0.5098


Epoch 13 [Training]: 100%|██████████| 2772/2772 [06:19<00:00,  7.31it/s]


EPOCH : 12 | train_loss : 0.4757


Epoch 13 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.30it/s]


Fold 3, Epoch 13, Validation Loss: 0.8183, Validation Accuracy: 0.5104


Epoch 14 [Training]: 100%|██████████| 2772/2772 [06:18<00:00,  7.33it/s]


EPOCH : 13 | train_loss : 0.4766


Epoch 14 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.63it/s]


Fold 3, Epoch 14, Validation Loss: 0.8197, Validation Accuracy: 0.5108


Epoch 15 [Training]: 100%|██████████| 2772/2772 [06:18<00:00,  7.32it/s]


EPOCH : 14 | train_loss : 0.4747


Epoch 15 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.58it/s]


Fold 3, Epoch 15, Validation Loss: 0.8244, Validation Accuracy: 0.5107
Fold 4


  return F.conv1d(input, weight, bias, self.stride,
Epoch 1 [Training]: 100%|██████████| 2772/2772 [06:18<00:00,  7.32it/s]


EPOCH : 0 | train_loss : 0.4765


Epoch 1 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.07it/s]


Fold 4, Epoch 1, Validation Loss: 0.8290, Validation Accuracy: 0.5083


Epoch 2 [Training]: 100%|██████████| 2772/2772 [06:17<00:00,  7.33it/s]


EPOCH : 1 | train_loss : 0.4706


Epoch 2 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.32it/s]


Fold 4, Epoch 2, Validation Loss: 0.8328, Validation Accuracy: 0.5084


Epoch 3 [Training]: 100%|██████████| 2772/2772 [06:17<00:00,  7.34it/s]


EPOCH : 2 | train_loss : 0.4718


Epoch 3 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.30it/s]


Fold 4, Epoch 3, Validation Loss: 0.8399, Validation Accuracy: 0.5074


Epoch 4 [Training]: 100%|██████████| 2772/2772 [06:19<00:00,  7.31it/s]


EPOCH : 3 | train_loss : 0.4732


Epoch 4 [Validation]: 100%|██████████| 693/693 [00:49<00:00, 14.12it/s]


Fold 4, Epoch 4, Validation Loss: 0.8409, Validation Accuracy: 0.5085


Epoch 5 [Training]: 100%|██████████| 2772/2772 [06:18<00:00,  7.33it/s]


EPOCH : 4 | train_loss : 0.4714


Epoch 5 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.25it/s]


Fold 4, Epoch 5, Validation Loss: 0.8457, Validation Accuracy: 0.5080


Epoch 6 [Training]: 100%|██████████| 2772/2772 [06:17<00:00,  7.34it/s]


EPOCH : 5 | train_loss : 0.4680


Epoch 6 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.32it/s]


Fold 4, Epoch 6, Validation Loss: 0.8497, Validation Accuracy: 0.5077


Epoch 7 [Training]: 100%|██████████| 2772/2772 [06:18<00:00,  7.33it/s]


EPOCH : 6 | train_loss : 0.4689


Epoch 7 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.23it/s]


Fold 4, Epoch 7, Validation Loss: 0.8524, Validation Accuracy: 0.5080


Epoch 8 [Training]: 100%|██████████| 2772/2772 [06:18<00:00,  7.32it/s]


EPOCH : 7 | train_loss : 0.4667


Epoch 8 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.24it/s]


Fold 4, Epoch 8, Validation Loss: 0.8563, Validation Accuracy: 0.5081


Epoch 9 [Training]: 100%|██████████| 2772/2772 [06:17<00:00,  7.33it/s]


EPOCH : 8 | train_loss : 0.4657


Epoch 9 [Validation]: 100%|██████████| 693/693 [00:46<00:00, 15.02it/s]


Fold 4, Epoch 9, Validation Loss: 0.8573, Validation Accuracy: 0.5089


Epoch 10 [Training]: 100%|██████████| 2772/2772 [06:18<00:00,  7.32it/s]


EPOCH : 9 | train_loss : 0.4667


Epoch 10 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.34it/s]


Fold 4, Epoch 10, Validation Loss: 0.8613, Validation Accuracy: 0.5086


Epoch 11 [Training]: 100%|██████████| 2772/2772 [06:16<00:00,  7.35it/s]


EPOCH : 10 | train_loss : 0.4650


Epoch 11 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.21it/s]


Fold 4, Epoch 11, Validation Loss: 0.8654, Validation Accuracy: 0.5083


Epoch 12 [Training]: 100%|██████████| 2772/2772 [06:16<00:00,  7.36it/s]


EPOCH : 11 | train_loss : 0.4665


Epoch 12 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.26it/s]


Fold 4, Epoch 12, Validation Loss: 0.8698, Validation Accuracy: 0.5081


Epoch 13 [Training]: 100%|██████████| 2772/2772 [06:16<00:00,  7.36it/s]


EPOCH : 12 | train_loss : 0.4648


Epoch 13 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.25it/s]


Fold 4, Epoch 13, Validation Loss: 0.8697, Validation Accuracy: 0.5088


Epoch 14 [Training]: 100%|██████████| 2772/2772 [06:17<00:00,  7.34it/s]


EPOCH : 13 | train_loss : 0.4636


Epoch 14 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.21it/s]


Fold 4, Epoch 14, Validation Loss: 0.8735, Validation Accuracy: 0.5088


Epoch 15 [Training]: 100%|██████████| 2772/2772 [06:16<00:00,  7.35it/s]


EPOCH : 14 | train_loss : 0.4656


Epoch 15 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.32it/s]


Fold 4, Epoch 15, Validation Loss: 0.8788, Validation Accuracy: 0.5082
Fold 5


  return F.conv1d(input, weight, bias, self.stride,
Epoch 1 [Training]: 100%|██████████| 2772/2772 [06:15<00:00,  7.39it/s]


EPOCH : 0 | train_loss : 0.4640


Epoch 1 [Validation]: 100%|██████████| 693/693 [00:46<00:00, 14.82it/s]


Fold 5, Epoch 1, Validation Loss: 0.8698, Validation Accuracy: 0.5107


Epoch 2 [Training]: 100%|██████████| 2772/2772 [06:18<00:00,  7.33it/s]


EPOCH : 1 | train_loss : 0.4627


Epoch 2 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.31it/s]


Fold 5, Epoch 2, Validation Loss: 0.8775, Validation Accuracy: 0.5095


Epoch 3 [Training]: 100%|██████████| 2772/2772 [06:18<00:00,  7.33it/s]


EPOCH : 2 | train_loss : 0.4609


Epoch 3 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.53it/s]


Fold 5, Epoch 3, Validation Loss: 0.8787, Validation Accuracy: 0.5100


Epoch 4 [Training]: 100%|██████████| 2772/2772 [06:17<00:00,  7.34it/s]


EPOCH : 3 | train_loss : 0.4622


Epoch 4 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.41it/s]


Fold 5, Epoch 4, Validation Loss: 0.8798, Validation Accuracy: 0.5100


Epoch 5 [Training]: 100%|██████████| 2772/2772 [06:24<00:00,  7.21it/s]


EPOCH : 4 | train_loss : 0.4593


Epoch 5 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.38it/s]


Fold 5, Epoch 5, Validation Loss: 0.8781, Validation Accuracy: 0.5113


Epoch 6 [Training]: 100%|██████████| 2772/2772 [06:16<00:00,  7.36it/s]


EPOCH : 5 | train_loss : 0.4603


Epoch 6 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.48it/s]


Fold 5, Epoch 6, Validation Loss: 0.8819, Validation Accuracy: 0.5110


Epoch 7 [Training]: 100%|██████████| 2772/2772 [06:17<00:00,  7.34it/s]


EPOCH : 6 | train_loss : 0.4580


Epoch 7 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.52it/s]


Fold 5, Epoch 7, Validation Loss: 0.8803, Validation Accuracy: 0.5121


Epoch 8 [Training]: 100%|██████████| 2772/2772 [06:16<00:00,  7.37it/s]


EPOCH : 7 | train_loss : 0.4586


Epoch 8 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.44it/s]


Fold 5, Epoch 8, Validation Loss: 0.8811, Validation Accuracy: 0.5124


Epoch 9 [Training]: 100%|██████████| 2772/2772 [06:18<00:00,  7.32it/s]


EPOCH : 8 | train_loss : 0.4590


Epoch 9 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.43it/s]


Fold 5, Epoch 9, Validation Loss: 0.8846, Validation Accuracy: 0.5121


Epoch 10 [Training]: 100%|██████████| 2772/2772 [06:17<00:00,  7.35it/s]


EPOCH : 9 | train_loss : 0.4550


Epoch 10 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.22it/s]


Fold 5, Epoch 10, Validation Loss: 0.8838, Validation Accuracy: 0.5129


Epoch 11 [Training]: 100%|██████████| 2772/2772 [06:16<00:00,  7.36it/s]


EPOCH : 10 | train_loss : 0.4590


Epoch 11 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.37it/s]


Fold 5, Epoch 11, Validation Loss: 0.8860, Validation Accuracy: 0.5129


Epoch 12 [Training]: 100%|██████████| 2772/2772 [06:17<00:00,  7.35it/s]


EPOCH : 11 | train_loss : 0.4558


Epoch 12 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.41it/s]


Fold 5, Epoch 12, Validation Loss: 0.8805, Validation Accuracy: 0.5142


Epoch 13 [Training]: 100%|██████████| 2772/2772 [06:16<00:00,  7.36it/s]


EPOCH : 12 | train_loss : 0.4576


Epoch 13 [Validation]: 100%|██████████| 693/693 [00:44<00:00, 15.49it/s]


Fold 5, Epoch 13, Validation Loss: 0.8888, Validation Accuracy: 0.5131


Epoch 14 [Training]: 100%|██████████| 2772/2772 [06:18<00:00,  7.33it/s]


EPOCH : 13 | train_loss : 0.4568


Epoch 14 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.32it/s]


Fold 5, Epoch 14, Validation Loss: 0.8889, Validation Accuracy: 0.5133


Epoch 15 [Training]: 100%|██████████| 2772/2772 [06:19<00:00,  7.31it/s]


EPOCH : 14 | train_loss : 0.4553


Epoch 15 [Validation]: 100%|██████████| 693/693 [00:45<00:00, 15.18it/s]


Fold 5, Epoch 15, Validation Loss: 0.8907, Validation Accuracy: 0.5133
Training complete!


In [13]:
class TestDataset(Dataset):
    def __init__(self, df):
        self.df = df

    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, idx):
        path = os.path.join("..", "dataset", self.df.loc[idx, 'path'])
        if not os.path.exists(path):
            raise FileNotFoundError(f"파일을 찾을 수 없습니다: {path}")     
        signal = speech_file_to_array_fn(path)           
        return signal, -1
        

In [14]:
# 데이터셋 클래스 정의
class EMDataset(Dataset):
    def __init__(self, df):
        self.df = df

    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, idx):
        path = os.path.join("..", "dataset", self.df.loc[idx, 'path'])
        if not os.path.exists(path):
            raise FileNotFoundError(f"파일을 찾을 수 없습니다: {path}")
        signal = speech_file_to_array_fn(path)        
        return signal, -1

In [15]:
test_df = pd.read_csv('../dataset/test.csv', index_col=None)
test_df['path'] = '../dataset/' + test_df['path'].str[1:]
test_dataset = TestDataset(test_df)
test_loader = DataLoader(test_dataset, shuffle=False, num_workers=24, batch_size=16, collate_fn=collate_fn)

In [16]:
def inference(model, test_loader, device):
    model.to(device)
    model = model.eval()
    predictions = []
    with torch.no_grad():
        for inputs, labels in tqdm(test_loader):
            
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            probs = model(inputs)

            probs = torch.sigmoid(probs)
            probs  = probs.cpu().detach().numpy()
            predictions += probs.tolist()
            
    return predictions

In [17]:
preds = inference(model, test_loader, device)

  return F.conv1d(input, weight, bias, self.stride,
100%|██████████| 3125/3125 [01:39<00:00, 31.25it/s]


In [18]:
len(preds)

50000

In [19]:
submit = pd.read_csv('../dataset/sample_submission.csv')
submit.iloc[:, 1:] = preds
submit.head()

  submit.iloc[:, 1:] = preds
  submit.iloc[:, 1:] = preds


Unnamed: 0,id,fake,real
0,TEST_00000,0.259104,0.739687
1,TEST_00001,0.250208,0.747689
2,TEST_00002,0.259978,0.740833
3,TEST_00003,0.484868,0.525856
4,TEST_00004,0.273432,0.726768


In [20]:
submit.to_csv('submission/3_0.001_hubert_submit.csv', index=False)