# Imports

In [3]:
import librosa

import numpy as np
import pandas as pd
import random

import torch
import torchmetrics
import os
import torch.nn.functional as F
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from joblib import Parallel, delayed



In [4]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

### Config

In [5]:
class Config:
    SR = 32000
    N_MFCC = 40
    
    # Dataset
    ROOT_DIR = 'C:/HongBeomsun/Dataset_SSD/FakeVoice'
    
    # Training
    N_CLASSES = 2
    BATCH_SIZE = 96
    N_EPOCHS = 100
    LEARNING_RATE = 1e-3
    
    # Others
    SEED = 42
    
CONFIG = Config()

In [6]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

In [7]:
seed_everything(CONFIG.SEED)

### Data

In [8]:
df = pd.read_csv(os.path.join(CONFIG.ROOT_DIR,'train.csv'))

In [9]:
print(len(df))
df.head()

55438


Unnamed: 0,id,path,label
0,RUNQPNJF,./train/RUNQPNJF.ogg,real
1,JFAWUOGJ,./train/JFAWUOGJ.ogg,fake
2,RDKEKEVX,./train/RDKEKEVX.ogg,real
3,QYHJDOFK,./train/QYHJDOFK.ogg,real
4,RSPQNHAO,./train/RSPQNHAO.ogg,real


In [10]:
df['label'].value_counts()

label
fake    27818
real    27620
Name: count, dtype: int64

In [11]:
train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, random_state=CONFIG.SEED, stratify=df['label'])

In [12]:
train['label'].value_counts()
val['label'].value_counts()

label
fake    5564
real    5524
Name: count, dtype: int64

### Pre-processing : MFCC & DeltaMFCC

In [15]:
def get_features(df, train_mode=True, use_parallel=False):
    features = []
    labels = []
    for i, (index, row) in enumerate(tqdm(df.iterrows(), total=len(df)), 1):
        try:
            y, sr = librosa.load(os.path.join(CONFIG.ROOT_DIR, row['path']), sr=CONFIG.SR)
            
            mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=CONFIG.N_MFCC)
            delta_mfcc = librosa.feature.delta(mfcc)
            
            combined = np.vstack([mfcc, delta_mfcc])
            combined_mean = np.mean(combined, axis=1)
            features.append(combined_mean)
            
            if train_mode:
                label = row['label']
                label_vector = np.zeros(CONFIG.N_CLASSES, dtype=float)
                label_vector[0 if label == 'fake' else 1] = 1
                labels.append(label_vector)
                
        except Exception as e:
            print(f'Error while {index} : {e}')
            continue
    
    if train_mode:
        return features, labels
    return features
        

In [16]:
train_features, train_labels = get_features(train)
val_features, val_labels = get_features(val)

 59%|█████▉    | 26072/44350 [06:13<04:01, 75.75it/s]

Error while 26061 : when mode='interp', width=9 cannot exceed data.shape[axis]=7


 72%|███████▏  | 31955/44350 [07:36<02:43, 75.81it/s]

Error while 31944 : when mode='interp', width=9 cannot exceed data.shape[axis]=8


100%|██████████| 44350/44350 [10:32<00:00, 70.07it/s]
100%|██████████| 11088/11088 [02:14<00:00, 82.32it/s] 


In [17]:
print(train_features[10].shape)
print(len(train_features))
print(len(train_labels))

(80,)
44348
44348


In [18]:
def save_np():
    os.makedirs(os.path.join(CONFIG.ROOT_DIR, 'npy'), exist_ok=True)
    np.save(os.path.join(CONFIG.ROOT_DIR, 'npy/train_features_mean.npy'), train_features)
    np.save(os.path.join(CONFIG.ROOT_DIR, 'npy/train_labels_mean.npy'), train_labels)
    np.save(os.path.join(CONFIG.ROOT_DIR, 'npy/val_features_mean.npy'), val_features)
    np.save(os.path.join(CONFIG.ROOT_DIR, 'npy/val_labels_mean.npy'), val_labels)

In [19]:
def load_np():
    train_features = np.load(os.path.join(CONFIG.ROOT_DIR, 'npy/train_features.npy'))
    train_labels = np.load(os.path.join(CONFIG.ROOT_DIR, 'npy/train_labels.npy'))
    val_features = np.load(os.path.join(CONFIG.ROOT_DIR, 'npy/val_features.npy'))
    val_labels = np.load(os.path.join(CONFIG.ROOT_DIR, 'npy/val_labels.npy'))
    
    return train_features, train_labels, val_features, val_labels

In [20]:
save_np()
# train_features, train_labels, val_features, val_labels = load_np()

In [25]:
train_features = np.array(train_features)
train_labels = np.array(train_labels)
val_features = np.array(val_features)
val_labels = np.array(val_labels)

In [26]:
print(train_features.shape, len(train_labels))
print(val_features.shape, len(val_labels))

(44348, 80) 44348
(11088, 80) 11088


In [27]:
print(train_features[0], train_labels[0])

[-3.6298355e+02  1.2409643e+02 -5.4747303e+01  7.0447746e+01
 -2.6750314e+00  1.0377989e+01  2.1588305e+01 -1.1392265e+01
  1.5144799e+01 -6.1271892e+00  9.7842264e+00  6.8455625e+00
 -2.6896639e+00  1.3830722e+01 -5.1628785e+00  9.7452707e+00
  7.0354652e+00  7.6968670e-02  1.1705402e+01 -7.1903372e-01
  8.0656118e+00  2.2053039e+00 -2.8435019e-01  7.1962514e+00
 -2.3920376e+00  5.1100283e+00 -5.1483619e-01  1.0345262e+00
  4.8815756e+00 -3.1192234e+00  6.9965553e+00  4.8622530e-02
  3.8208070e-01  6.5177312e+00 -6.4668083e-01  4.9323301e+00
  1.3422136e+00  1.8539882e+00  3.2438755e+00 -1.1100441e+00
 -1.5834183e-01  1.3949156e-02 -4.3554106e-03  3.9133374e-02
 -1.1917626e-02  4.1101277e-03  2.5846034e-02  2.5671145e-02
  2.9278807e-02 -2.4162741e-02 -1.5184934e-02 -3.6436853e-03
 -8.2941456e-03 -1.0004562e-02 -2.2768199e-02 -2.4706349e-03
  4.7766133e-03  5.7219812e-03  9.9832593e-03 -1.1366978e-02
 -7.5917068e-04 -1.4376579e-03 -6.7380914e-03  9.1720149e-03
 -3.3423454e-03 -2.46119

### Dataset

In [28]:
class CustomDataset(Dataset):
    def __init__(self, mfcc, label):
        self.mfcc = mfcc
        self.label = label

    def __len__(self):
        return len(self.mfcc)

    def __getitem__(self, index):
        if self.label is not None:
            return self.mfcc[index], self.label[index]
        return self.mfcc[index]

In [29]:
train_dataset = CustomDataset(train_features, train_labels)
val_dataset = CustomDataset(val_features, val_labels)

In [32]:
train_loader = DataLoader(
    train_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False
)

### Define Model

In [33]:
class MLP(nn.Module):
    def __init__(self, input_dim=80, output_dim=CONFIG.N_CLASSES):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, 256)
        self.bn1 = nn.BatchNorm1d(256)
        self.dropout1 = nn.Dropout(0.3)
        
        self.fc2 = nn.Linear(256, 128)
        self.bn2 = nn.BatchNorm1d(128)
        self.dropout2 = nn.Dropout(0.3)
        
        self.fc3 = nn.Linear(128, output_dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)
        x = self.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)
        x = torch.sigmoid(self.fc3(x))
        return x

### Train & Validation

In [34]:
from sklearn.metrics import roc_auc_score
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler

In [35]:
def train(model, scheduler, optimizer, train_loader, val_loader, device):
    model.to(device)
    criterion = nn.BCELoss().to(device)
    
    best_val_score = 0
    best_model = None
    
    for epoch in range(1, CONFIG.N_EPOCHS+1):
        model.train()
        train_loss = []
        for features, labels in tqdm(iter(train_loader)):
            features = features.float().to(device)
            labels = labels.float().to(device)
            
            optimizer.zero_grad()
            
            output = model(features)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val AUC : [{_val_score:.5f}] LEARNING RATE : [{optimizer.param_groups[0]["lr"]:.5f}]')

        scheduler.step(_val_loss)
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
    
    return best_model

In [36]:
def multiLabel_AUC(y_true, y_scores):
    auc_scores = []
    for i in range(y_true.shape[1]):
        auc = roc_auc_score(y_true[:, i], y_scores[:, i])
        auc_scores.append(auc)
    mean_auc_score = np.mean(auc_scores)
    return mean_auc_score

In [37]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss, all_labels, all_probs = [], [], []
    
    with torch.no_grad():
        for features, labels in tqdm(iter(val_loader)):
            features = features.float().to(device)
            labels = labels.float().to(device)
            
            probs = model(features)
            
            loss = criterion(probs, labels)

            val_loss.append(loss.item())

            all_labels.append(labels.cpu().numpy())
            all_probs.append(probs.cpu().numpy())
        
        _val_loss = np.mean(val_loss)

        all_labels = np.concatenate(all_labels, axis=0)
        all_probs = np.concatenate(all_probs, axis=0)
        
        # Calculate AUC score
        auc_score = multiLabel_AUC(all_labels, all_probs)
    
    return _val_loss, auc_score

### Run

In [38]:
model = MLP()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CONFIG.LEARNING_RATE)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True)

infer_model = train(model, scheduler, optimizer, train_loader, val_loader, device)

100%|██████████| 462/462 [00:13<00:00, 33.72it/s] 
100%|██████████| 116/116 [00:00<00:00, 555.33it/s]


Epoch [1], Train Loss : [0.14385] Val Loss : [0.03472] Val AUC : [0.99927] LEARNING RATE : [0.00100]


100%|██████████| 462/462 [00:01<00:00, 321.30it/s]
100%|██████████| 116/116 [00:00<00:00, 583.29it/s]


Epoch [2], Train Loss : [0.04731] Val Loss : [0.02429] Val AUC : [0.99951] LEARNING RATE : [0.00100]


100%|██████████| 462/462 [00:01<00:00, 341.89it/s]
100%|██████████| 116/116 [00:00<00:00, 620.19it/s]


Epoch [3], Train Loss : [0.03444] Val Loss : [0.02116] Val AUC : [0.99967] LEARNING RATE : [0.00100]


100%|██████████| 462/462 [00:01<00:00, 357.88it/s]
100%|██████████| 116/116 [00:00<00:00, 626.93it/s]


Epoch [4], Train Loss : [0.02541] Val Loss : [0.01293] Val AUC : [0.99990] LEARNING RATE : [0.00100]


100%|██████████| 462/462 [00:01<00:00, 331.60it/s]
100%|██████████| 116/116 [00:00<00:00, 581.15it/s]


Epoch [5], Train Loss : [0.02310] Val Loss : [0.01280] Val AUC : [0.99985] LEARNING RATE : [0.00100]


100%|██████████| 462/462 [00:01<00:00, 320.68it/s]
100%|██████████| 116/116 [00:00<00:00, 547.29it/s]


Epoch [6], Train Loss : [0.01982] Val Loss : [0.00959] Val AUC : [0.99992] LEARNING RATE : [0.00100]


100%|██████████| 462/462 [00:01<00:00, 313.48it/s]
100%|██████████| 116/116 [00:00<00:00, 595.92it/s]


Epoch [7], Train Loss : [0.01865] Val Loss : [0.00709] Val AUC : [0.99996] LEARNING RATE : [0.00100]


100%|██████████| 462/462 [00:01<00:00, 315.45it/s]
100%|██████████| 116/116 [00:00<00:00, 545.86it/s]


Epoch [8], Train Loss : [0.01647] Val Loss : [0.00737] Val AUC : [0.99996] LEARNING RATE : [0.00100]


100%|██████████| 462/462 [00:01<00:00, 308.75it/s]
100%|██████████| 116/116 [00:00<00:00, 596.16it/s]


Epoch [9], Train Loss : [0.01508] Val Loss : [0.00708] Val AUC : [0.99996] LEARNING RATE : [0.00100]


100%|██████████| 462/462 [00:01<00:00, 314.74it/s]
100%|██████████| 116/116 [00:00<00:00, 582.18it/s]


Epoch [10], Train Loss : [0.01477] Val Loss : [0.00642] Val AUC : [0.99997] LEARNING RATE : [0.00100]


100%|██████████| 462/462 [00:01<00:00, 323.09it/s]
100%|██████████| 116/116 [00:00<00:00, 542.37it/s]


Epoch [11], Train Loss : [0.01441] Val Loss : [0.00808] Val AUC : [0.99995] LEARNING RATE : [0.00100]


100%|██████████| 462/462 [00:01<00:00, 323.98it/s]
100%|██████████| 116/116 [00:00<00:00, 587.27it/s]


Epoch [12], Train Loss : [0.01296] Val Loss : [0.00742] Val AUC : [0.99996] LEARNING RATE : [0.00100]


100%|██████████| 462/462 [00:01<00:00, 331.31it/s]
100%|██████████| 116/116 [00:00<00:00, 594.62it/s]


Epoch [13], Train Loss : [0.01344] Val Loss : [0.00805] Val AUC : [0.99996] LEARNING RATE : [0.00100]


100%|██████████| 462/462 [00:01<00:00, 324.29it/s]
100%|██████████| 116/116 [00:00<00:00, 592.57it/s]


Epoch [14], Train Loss : [0.01181] Val Loss : [0.00648] Val AUC : [0.99997] LEARNING RATE : [0.00100]


100%|██████████| 462/462 [00:01<00:00, 323.29it/s]
100%|██████████| 116/116 [00:00<00:00, 591.74it/s]


Epoch [15], Train Loss : [0.01042] Val Loss : [0.00593] Val AUC : [0.99997] LEARNING RATE : [0.00100]


100%|██████████| 462/462 [00:01<00:00, 316.97it/s]
100%|██████████| 116/116 [00:00<00:00, 552.31it/s]


Epoch [16], Train Loss : [0.01049] Val Loss : [0.00654] Val AUC : [0.99997] LEARNING RATE : [0.00100]


100%|██████████| 462/462 [00:01<00:00, 314.38it/s]
100%|██████████| 116/116 [00:00<00:00, 583.12it/s]


Epoch [17], Train Loss : [0.00940] Val Loss : [0.00600] Val AUC : [0.99997] LEARNING RATE : [0.00100]


100%|██████████| 462/462 [00:01<00:00, 300.30it/s]
100%|██████████| 116/116 [00:00<00:00, 600.96it/s]


Epoch [18], Train Loss : [0.00940] Val Loss : [0.00713] Val AUC : [0.99997] LEARNING RATE : [0.00100]


100%|██████████| 462/462 [00:01<00:00, 298.50it/s]
100%|██████████| 116/116 [00:00<00:00, 582.20it/s]


Epoch [19], Train Loss : [0.00960] Val Loss : [0.00668] Val AUC : [0.99997] LEARNING RATE : [0.00100]


100%|██████████| 462/462 [00:01<00:00, 316.94it/s]
100%|██████████| 116/116 [00:00<00:00, 616.92it/s]


Epoch [20], Train Loss : [0.00901] Val Loss : [0.00711] Val AUC : [0.99997] LEARNING RATE : [0.00100]


100%|██████████| 462/462 [00:01<00:00, 304.51it/s]
100%|██████████| 116/116 [00:00<00:00, 579.92it/s]


Epoch [21], Train Loss : [0.00770] Val Loss : [0.00674] Val AUC : [0.99997] LEARNING RATE : [0.00100]
Epoch 00021: reducing learning rate of group 0 to 1.0000e-04.


100%|██████████| 462/462 [00:01<00:00, 327.91it/s]
100%|██████████| 116/116 [00:00<00:00, 607.25it/s]


Epoch [22], Train Loss : [0.00556] Val Loss : [0.00496] Val AUC : [0.99998] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 304.79it/s]
100%|██████████| 116/116 [00:00<00:00, 534.76it/s]


Epoch [23], Train Loss : [0.00569] Val Loss : [0.00464] Val AUC : [0.99999] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 298.20it/s]
100%|██████████| 116/116 [00:00<00:00, 575.34it/s]


Epoch [24], Train Loss : [0.00474] Val Loss : [0.00399] Val AUC : [0.99999] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 301.71it/s]
100%|██████████| 116/116 [00:00<00:00, 582.81it/s]


Epoch [25], Train Loss : [0.00493] Val Loss : [0.00421] Val AUC : [0.99999] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 292.56it/s]
100%|██████████| 116/116 [00:00<00:00, 568.53it/s]


Epoch [26], Train Loss : [0.00445] Val Loss : [0.00386] Val AUC : [0.99999] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 317.89it/s]
100%|██████████| 116/116 [00:00<00:00, 618.32it/s]


Epoch [27], Train Loss : [0.00383] Val Loss : [0.00461] Val AUC : [0.99999] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 324.58it/s]
100%|██████████| 116/116 [00:00<00:00, 560.67it/s]


Epoch [28], Train Loss : [0.00434] Val Loss : [0.00481] Val AUC : [0.99999] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 317.76it/s]
100%|██████████| 116/116 [00:00<00:00, 554.93it/s]


Epoch [29], Train Loss : [0.00376] Val Loss : [0.00387] Val AUC : [0.99999] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 313.58it/s]
100%|██████████| 116/116 [00:00<00:00, 574.18it/s]


Epoch [30], Train Loss : [0.00476] Val Loss : [0.00397] Val AUC : [0.99999] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 312.47it/s]
100%|██████████| 116/116 [00:00<00:00, 579.90it/s]


Epoch [31], Train Loss : [0.00419] Val Loss : [0.00421] Val AUC : [0.99999] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 304.77it/s]
100%|██████████| 116/116 [00:00<00:00, 545.77it/s]


Epoch [32], Train Loss : [0.00383] Val Loss : [0.00383] Val AUC : [0.99999] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 324.65it/s]
100%|██████████| 116/116 [00:00<00:00, 556.36it/s]


Epoch [33], Train Loss : [0.00348] Val Loss : [0.00390] Val AUC : [0.99999] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 314.10it/s]
100%|██████████| 116/116 [00:00<00:00, 560.30it/s]


Epoch [34], Train Loss : [0.00396] Val Loss : [0.00401] Val AUC : [0.99999] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 314.41it/s]
100%|██████████| 116/116 [00:00<00:00, 518.90it/s]


Epoch [35], Train Loss : [0.00299] Val Loss : [0.00376] Val AUC : [0.99999] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 337.71it/s]
100%|██████████| 116/116 [00:00<00:00, 521.22it/s]


Epoch [36], Train Loss : [0.00411] Val Loss : [0.00359] Val AUC : [0.99999] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 331.01it/s]
100%|██████████| 116/116 [00:00<00:00, 630.25it/s]


Epoch [37], Train Loss : [0.00311] Val Loss : [0.00363] Val AUC : [0.99999] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 333.36it/s]
100%|██████████| 116/116 [00:00<00:00, 553.87it/s]


Epoch [38], Train Loss : [0.00350] Val Loss : [0.00374] Val AUC : [0.99999] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 314.32it/s]
100%|██████████| 116/116 [00:00<00:00, 515.46it/s]


Epoch [39], Train Loss : [0.00297] Val Loss : [0.00374] Val AUC : [0.99999] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 308.57it/s]
100%|██████████| 116/116 [00:00<00:00, 607.23it/s]


Epoch [40], Train Loss : [0.00368] Val Loss : [0.00358] Val AUC : [0.99999] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 332.94it/s]
100%|██████████| 116/116 [00:00<00:00, 604.34it/s]


Epoch [41], Train Loss : [0.00268] Val Loss : [0.00390] Val AUC : [0.99999] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 320.17it/s]
100%|██████████| 116/116 [00:00<00:00, 523.62it/s]


Epoch [42], Train Loss : [0.00303] Val Loss : [0.00341] Val AUC : [0.99999] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 321.83it/s]
100%|██████████| 116/116 [00:00<00:00, 594.78it/s]


Epoch [43], Train Loss : [0.00307] Val Loss : [0.00396] Val AUC : [0.99999] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 332.74it/s]
100%|██████████| 116/116 [00:00<00:00, 557.64it/s]


Epoch [44], Train Loss : [0.00358] Val Loss : [0.00373] Val AUC : [0.99999] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 319.55it/s]
100%|██████████| 116/116 [00:00<00:00, 562.95it/s]


Epoch [45], Train Loss : [0.00253] Val Loss : [0.00460] Val AUC : [0.99999] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 289.82it/s]
100%|██████████| 116/116 [00:00<00:00, 582.80it/s]


Epoch [46], Train Loss : [0.00303] Val Loss : [0.00466] Val AUC : [0.99999] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 313.89it/s]
100%|██████████| 116/116 [00:00<00:00, 498.81it/s]


Epoch [47], Train Loss : [0.00339] Val Loss : [0.00453] Val AUC : [0.99999] LEARNING RATE : [0.00010]


100%|██████████| 462/462 [00:01<00:00, 272.85it/s]
100%|██████████| 116/116 [00:00<00:00, 550.42it/s]


Epoch [48], Train Loss : [0.00282] Val Loss : [0.00404] Val AUC : [0.99999] LEARNING RATE : [0.00010]
Epoch 00048: reducing learning rate of group 0 to 1.0000e-05.


100%|██████████| 462/462 [00:01<00:00, 305.11it/s]
100%|██████████| 116/116 [00:00<00:00, 539.46it/s]


Epoch [49], Train Loss : [0.00343] Val Loss : [0.00430] Val AUC : [0.99999] LEARNING RATE : [0.00001]


100%|██████████| 462/462 [00:01<00:00, 300.64it/s]
100%|██████████| 116/116 [00:00<00:00, 552.03it/s]


Epoch [50], Train Loss : [0.00344] Val Loss : [0.00395] Val AUC : [0.99999] LEARNING RATE : [0.00001]


100%|██████████| 462/462 [00:01<00:00, 289.23it/s]
100%|██████████| 116/116 [00:00<00:00, 533.81it/s]


Epoch [51], Train Loss : [0.00302] Val Loss : [0.00389] Val AUC : [0.99999] LEARNING RATE : [0.00001]


100%|██████████| 462/462 [00:01<00:00, 304.06it/s]
100%|██████████| 116/116 [00:00<00:00, 580.10it/s]


Epoch [52], Train Loss : [0.00365] Val Loss : [0.00375] Val AUC : [0.99999] LEARNING RATE : [0.00001]


100%|██████████| 462/462 [00:01<00:00, 297.16it/s]
100%|██████████| 116/116 [00:00<00:00, 554.94it/s]


Epoch [53], Train Loss : [0.00245] Val Loss : [0.00423] Val AUC : [0.99999] LEARNING RATE : [0.00001]


100%|██████████| 462/462 [00:01<00:00, 304.16it/s]
100%|██████████| 116/116 [00:00<00:00, 630.35it/s]


Epoch [54], Train Loss : [0.00276] Val Loss : [0.00399] Val AUC : [0.99999] LEARNING RATE : [0.00001]
Epoch 00054: reducing learning rate of group 0 to 1.0000e-06.


100%|██████████| 462/462 [00:01<00:00, 304.46it/s]
100%|██████████| 116/116 [00:00<00:00, 529.54it/s]


Epoch [55], Train Loss : [0.00281] Val Loss : [0.00424] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 307.98it/s]
100%|██████████| 116/116 [00:00<00:00, 630.35it/s]


Epoch [56], Train Loss : [0.00270] Val Loss : [0.00428] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 324.95it/s]
100%|██████████| 116/116 [00:00<00:00, 611.37it/s]


Epoch [57], Train Loss : [0.00353] Val Loss : [0.00425] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 300.26it/s]
100%|██████████| 116/116 [00:00<00:00, 597.84it/s]


Epoch [58], Train Loss : [0.00260] Val Loss : [0.00421] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 309.76it/s]
100%|██████████| 116/116 [00:00<00:00, 591.61it/s]


Epoch [59], Train Loss : [0.00324] Val Loss : [0.00395] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 309.40it/s]
100%|██████████| 116/116 [00:00<00:00, 559.33it/s]


Epoch [60], Train Loss : [0.00291] Val Loss : [0.00412] Val AUC : [0.99999] LEARNING RATE : [0.00000]
Epoch 00060: reducing learning rate of group 0 to 1.0000e-07.


100%|██████████| 462/462 [00:01<00:00, 292.18it/s]
100%|██████████| 116/116 [00:00<00:00, 474.34it/s]


Epoch [61], Train Loss : [0.00220] Val Loss : [0.00415] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 299.14it/s]
100%|██████████| 116/116 [00:00<00:00, 536.91it/s]


Epoch [62], Train Loss : [0.00270] Val Loss : [0.00401] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 319.69it/s]
100%|██████████| 116/116 [00:00<00:00, 571.59it/s]


Epoch [63], Train Loss : [0.00286] Val Loss : [0.00419] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 310.86it/s]
100%|██████████| 116/116 [00:00<00:00, 471.45it/s]


Epoch [64], Train Loss : [0.00268] Val Loss : [0.00391] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 329.50it/s]
100%|██████████| 116/116 [00:00<00:00, 628.65it/s]


Epoch [65], Train Loss : [0.00284] Val Loss : [0.00389] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 329.46it/s]
100%|██████████| 116/116 [00:00<00:00, 564.94it/s]


Epoch [66], Train Loss : [0.00258] Val Loss : [0.00382] Val AUC : [0.99999] LEARNING RATE : [0.00000]
Epoch 00066: reducing learning rate of group 0 to 1.0000e-08.


100%|██████████| 462/462 [00:01<00:00, 327.29it/s]
100%|██████████| 116/116 [00:00<00:00, 559.38it/s]


Epoch [67], Train Loss : [0.00222] Val Loss : [0.00388] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 316.52it/s]
100%|██████████| 116/116 [00:00<00:00, 561.69it/s]


Epoch [68], Train Loss : [0.00236] Val Loss : [0.00401] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 316.82it/s]
100%|██████████| 116/116 [00:00<00:00, 587.63it/s]


Epoch [69], Train Loss : [0.00270] Val Loss : [0.00396] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 324.39it/s]
100%|██████████| 116/116 [00:00<00:00, 540.67it/s]


Epoch [70], Train Loss : [0.00237] Val Loss : [0.00429] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 327.73it/s]
100%|██████████| 116/116 [00:00<00:00, 569.71it/s]


Epoch [71], Train Loss : [0.00305] Val Loss : [0.00409] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 320.16it/s]
100%|██████████| 116/116 [00:00<00:00, 594.77it/s]


Epoch [72], Train Loss : [0.00268] Val Loss : [0.00413] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 327.43it/s]
100%|██████████| 116/116 [00:00<00:00, 633.34it/s]


Epoch [73], Train Loss : [0.00284] Val Loss : [0.00389] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 313.40it/s]
100%|██████████| 116/116 [00:00<00:00, 568.53it/s]


Epoch [74], Train Loss : [0.00306] Val Loss : [0.00413] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 319.74it/s]
100%|██████████| 116/116 [00:00<00:00, 591.74it/s]


Epoch [75], Train Loss : [0.00265] Val Loss : [0.00428] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 345.58it/s]
100%|██████████| 116/116 [00:00<00:00, 611.46it/s]


Epoch [76], Train Loss : [0.00214] Val Loss : [0.00370] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 331.87it/s]
100%|██████████| 116/116 [00:00<00:00, 579.91it/s]


Epoch [77], Train Loss : [0.00304] Val Loss : [0.00431] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 315.79it/s]
100%|██████████| 116/116 [00:00<00:00, 590.00it/s]


Epoch [78], Train Loss : [0.00241] Val Loss : [0.00463] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 343.87it/s]
100%|██████████| 116/116 [00:00<00:00, 644.36it/s]


Epoch [79], Train Loss : [0.00253] Val Loss : [0.00404] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 309.32it/s]
100%|██████████| 116/116 [00:00<00:00, 582.82it/s]


Epoch [80], Train Loss : [0.00342] Val Loss : [0.00423] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 310.93it/s]
100%|██████████| 116/116 [00:00<00:00, 616.91it/s]


Epoch [81], Train Loss : [0.00253] Val Loss : [0.00414] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 307.85it/s]
100%|██████████| 116/116 [00:00<00:00, 633.77it/s]


Epoch [82], Train Loss : [0.00261] Val Loss : [0.00433] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 326.17it/s]
100%|██████████| 116/116 [00:00<00:00, 486.95it/s]


Epoch [83], Train Loss : [0.00231] Val Loss : [0.00384] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 324.79it/s]
100%|██████████| 116/116 [00:00<00:00, 578.93it/s]


Epoch [84], Train Loss : [0.00272] Val Loss : [0.00403] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 319.40it/s]
100%|██████████| 116/116 [00:00<00:00, 618.61it/s]


Epoch [85], Train Loss : [0.00338] Val Loss : [0.00388] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 324.53it/s]
100%|██████████| 116/116 [00:00<00:00, 597.12it/s]


Epoch [86], Train Loss : [0.00305] Val Loss : [0.00403] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 322.19it/s]
100%|██████████| 116/116 [00:00<00:00, 568.53it/s]


Epoch [87], Train Loss : [0.00247] Val Loss : [0.00413] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 311.93it/s]
100%|██████████| 116/116 [00:00<00:00, 597.84it/s]


Epoch [88], Train Loss : [0.00325] Val Loss : [0.00415] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 305.78it/s]
100%|██████████| 116/116 [00:00<00:00, 594.77it/s]


Epoch [89], Train Loss : [0.00300] Val Loss : [0.00408] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 307.54it/s]
100%|██████████| 116/116 [00:00<00:00, 613.65it/s]


Epoch [90], Train Loss : [0.00323] Val Loss : [0.00403] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 332.99it/s]
100%|██████████| 116/116 [00:00<00:00, 630.32it/s]


Epoch [91], Train Loss : [0.00251] Val Loss : [0.00407] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 334.55it/s]
100%|██████████| 116/116 [00:00<00:00, 579.90it/s]


Epoch [92], Train Loss : [0.00264] Val Loss : [0.00454] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 333.50it/s]
100%|██████████| 116/116 [00:00<00:00, 600.95it/s]


Epoch [93], Train Loss : [0.00260] Val Loss : [0.00428] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 322.70it/s]
100%|██████████| 116/116 [00:00<00:00, 552.28it/s]


Epoch [94], Train Loss : [0.00287] Val Loss : [0.00388] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 329.76it/s]
100%|██████████| 116/116 [00:00<00:00, 562.75it/s]


Epoch [95], Train Loss : [0.00303] Val Loss : [0.00394] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 326.81it/s]
100%|██████████| 116/116 [00:00<00:00, 620.25it/s]


Epoch [96], Train Loss : [0.00275] Val Loss : [0.00429] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 322.02it/s]
100%|██████████| 116/116 [00:00<00:00, 555.68it/s]


Epoch [97], Train Loss : [0.00243] Val Loss : [0.00415] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 312.39it/s]
100%|██████████| 116/116 [00:00<00:00, 604.09it/s]


Epoch [98], Train Loss : [0.00294] Val Loss : [0.00416] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 329.00it/s]
100%|██████████| 116/116 [00:00<00:00, 594.77it/s]


Epoch [99], Train Loss : [0.00340] Val Loss : [0.00382] Val AUC : [0.99999] LEARNING RATE : [0.00000]


100%|██████████| 462/462 [00:01<00:00, 329.91it/s]
100%|██████████| 116/116 [00:00<00:00, 604.85it/s]

Epoch [100], Train Loss : [0.00302] Val Loss : [0.00384] Val AUC : [0.99999] LEARNING RATE : [0.00000]





### Inference

In [39]:
def get_features_without_e(df, train_mode=True, use_parallel=False):
    features = []
    labels = []
    for i, (index, row) in enumerate(tqdm(df.iterrows(), total=len(df)), 1):
        y, sr = librosa.load(os.path.join(CONFIG.ROOT_DIR, row['path']), sr=CONFIG.SR)
        
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=CONFIG.N_MFCC)
        delta_mfcc = librosa.feature.delta(mfcc)
        
        combined = np.vstack([mfcc, delta_mfcc])
        combined_mean = np.mean(combined, axis=1)
        features.append(combined_mean)
        
        if train_mode:
            label = row['label']
            label_vector = np.zeros(CONFIG.N_CLASSES, dtype=float)
            label_vector[0 if label == 'fake' else 1] = 1
            labels.append(label_vector)
    
    if train_mode:
        return features, labels
    return features

In [40]:
test = pd.read_csv(os.path.join(CONFIG.ROOT_DIR, 'test.csv'))
test_mfcc = get_features_without_e(test, False)
test_dataset = CustomDataset(test_mfcc, None)
test_loader = DataLoader(
    test_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False
)

100%|██████████| 50000/50000 [15:33<00:00, 53.56it/s]


In [41]:
# np.save(os.path.join(CONFIG.ROOT_DIR, 'npy/test_mfcc.npy'), test_mfcc)
# test_mfcc = np.load(os.path.join(CONFIG.ROOT_DIR, 'npy/test_mfcc.npy'))

In [42]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    predictions = []
    with torch.no_grad():
        for features in tqdm(iter(test_loader)):
            features = features.float().to(device)
            
            probs = model(features)

            probs  = probs.cpu().detach().numpy()
            predictions += probs.tolist()
    return predictions

In [43]:
preds = inference(infer_model, test_loader, device)

100%|██████████| 521/521 [00:00<00:00, 774.58it/s]


## Submission

In [45]:
submit = pd.read_csv(os.path.join(CONFIG.ROOT_DIR,'./sample_submission.csv'))
submit.iloc[:, 1:] = preds
submit.head()

Unnamed: 0,id,fake,real
0,TEST_00000,0.9944219,0.005529
1,TEST_00001,0.9809787,0.018809
2,TEST_00002,0.9812275,0.018559
3,TEST_00003,0.000114517,0.999887
4,TEST_00004,8.947465e-07,0.999999


In [47]:
submit.to_csv(f'./output/submit_MLP_SR{CONFIG.SR}_N_MFCC{CONFIG.N_MFCC}_EPOCH{CONFIG.N_EPOCHS}_BATCHSIZE{CONFIG.BATCH_SIZE}.csv', index=False)