# Imports

In [1]:
import librosa

import numpy as np
import pandas as pd
import random

import torch
import torchmetrics
import os
import torch.nn.functional as F
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from joblib import Parallel, delayed

import tensorflow as tf



In [2]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

### Config

In [3]:
class Config:
    SR = 32000
    N_MFCC = 40
    
    # Dataset
    ROOT_DIR = 'C:/HongBeomsun/Dataset_SSD/FakeVoice'
    
    # Training
    N_CLASSES = 2
    BATCH_SIZE = 64
    N_EPOCHS = 70
    LEARNING_RATE = 3e-4
    
    # Others
    SEED = 42
    
CONFIG = Config()

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

In [5]:
seed_everything(CONFIG.SEED)

### Data

In [6]:
df = pd.read_csv(os.path.join(CONFIG.ROOT_DIR,'train.csv'))

In [7]:
print(len(df))
df.head()

55438


Unnamed: 0,id,path,label
0,RUNQPNJF,./train/RUNQPNJF.ogg,real
1,JFAWUOGJ,./train/JFAWUOGJ.ogg,fake
2,RDKEKEVX,./train/RDKEKEVX.ogg,real
3,QYHJDOFK,./train/QYHJDOFK.ogg,real
4,RSPQNHAO,./train/RSPQNHAO.ogg,real


In [8]:
df['label'].value_counts()

label
fake    27818
real    27620
Name: count, dtype: int64

### Data Argumentation

### Train test split

In [9]:
train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, random_state=CONFIG.SEED, stratify=df['label'])

In [10]:
train['label'].value_counts()
val['label'].value_counts()

label
fake    5564
real    5524
Name: count, dtype: int64

### Feature Extraction

In [11]:
def noise(data):
    noise_amp = 0.01*np.random.uniform()*np.amax(data)
    data = data + noise_amp * np.random.normal(size=data.shape[0])
    return data

def stretch(data, rate=0.8):
    stretch_data = librosa.effects.time_stretch(data, rate=rate)
    return stretch_data

def pitch(data, sampling_rate, pitch_factor=0.7):
    pitch_data = librosa.effects.pitch_shift(y=data, sr=sampling_rate, n_steps=pitch_factor)
    return pitch_data

In [12]:
def normalize_volume(y, target_dB=-20):
    rms = np.sqrt(np.mean(y**2))
    loudness = 20 * np.log10(rms)
    loudness_change_dB = target_dB - loudness
    y_normalized = y * (10 ** (loudness_change_dB / 20))
    return y_normalized

def load_audio(file_path, sr):
    y, sr = librosa.load(file_path, sr=sr)
    y = normalize_volume(y)
    return y, sr

def extract_features(y, sr):
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=CONFIG.N_MFCC).T, axis=0)
    return mfcc

def augment_data(y, sr):
    augmented_data = []
    augmented_data.append(noise(y))
    augmented_data.append(stretch(y))
    augmented_data.append(pitch(y, sr))
    return augmented_data

def create_void_data(data, sr):
    void_data = np.zeros_like(data)
    void_data = noise(void_data)
    return void_data

def create_duo_data(data1, data2, sr):
    if len(data1) > len(data2):
        data2 = np.pad(data2, (0, len(data1)-len(data2)), 'constant')
    else:
        data1 = np.pad(data1, (0, len(data2)-len(data1)), 'constant')
        
    duo_data = data1 + data2
    max_val = np.max(np.abs(duo_data))
    if max_val > 1:
        duo_data = duo_data / max_val
    
    return duo_data

def mix_two_random_data(df, sr):
    idx1, idx2 = random.sample(range(len(df)), 2)
    y1, _ = load_audio(os.path.join(CONFIG.ROOT_DIR, df.iloc[idx1]['path']), sr)
    y2, _ = load_audio(os.path.join(CONFIG.ROOT_DIR, df.iloc[idx2]['path']), sr)
    y_duo = create_duo_data(y1, y2, sr)
    label_y1 = df.iloc[idx1]['label']
    label_y2 = df.iloc[idx2]['label']
    
    label_vector = np.zeros(CONFIG.N_CLASSES, dtype=float)
    label_vector[0 if label_y1 == 'fake' else 1] = 1
    label_vector[0 if label_y2 == 'fake' else 1] = 1
    
    return y_duo, label_vector

In [13]:
def get_features(df, train_mode=True, augment=False):
    features = []
    labels = []
    total = len(df)
    
    for i, (index, row) in enumerate(tqdm(df.iterrows(), total=total), 1):
        y, sr = load_audio(os.path.join(CONFIG.ROOT_DIR, row['path']), CONFIG.SR)
        
        if train_mode:
            label = row['label']
            label_vector = np.zeros(CONFIG.N_CLASSES, dtype=float)
            label_vector[0 if label == 'fake' else 1] = 1
            labels.append(label_vector)
        
        features.append(extract_features(y, sr))
    
    if augment:
        augmented_features = []
        augmented_labels = []
        
        num_mixed_samples = int(total * 0.1)
        for _ in range(num_mixed_samples):
            try:
                y_duo, y_duo_label = mix_two_random_data(df, CONFIG.SR)
                augmented_features.append(extract_features(y_duo, CONFIG.SR))
                augmented_labels.append(y_duo_label)
            except Exception as e:
                print(f'Error during data augmentation: {e}')
                continue
        
        num_augmented_samples = int(total * 0.2)
        original_features = list(features)
        original_labels = list(labels)
        for idx in range(num_augmented_samples):
            try:
                augmented_data = augment_data(original_features[idx], CONFIG.SR)
                for aug_y in augmented_data:
                    augmented_features.append(extract_features(aug_y, CONFIG.SR))
                    augmented_labels.append(original_labels[idx])
            except Exception as e:
                print(f'Error during augmentation: {e}')
                continue
        
        features.extend(augmented_features)
        labels.extend(augmented_labels)
    
    if train_mode:
        return np.array(features), np.array(labels)
    return np.array(features)

In [14]:
train_features, train_labels = get_features(train, train_mode=True, augment=True)
val_features, val_labels = get_features(val, train_mode=True, augment=False)

100%|██████████| 44350/44350 [10:17<00:00, 71.83it/s]
100%|██████████| 11088/11088 [01:54<00:00, 96.53it/s] 


In [None]:
def save_np():
    os.makedirs(os.path.join(CONFIG.ROOT_DIR, 'npy'), exist_ok=True)
    np.save(os.path.join(CONFIG.ROOT_DIR, 'npy/train_features_VariousFeatures_1000.npy'), train_features)
    np.save(os.path.join(CONFIG.ROOT_DIR, 'npy/train_labels_VariousFeatures_1000.npy'), train_labels)
    np.save(os.path.join(CONFIG.ROOT_DIR, 'npy/val_features_VariousFeatures_1000.npy'), val_features)
    np.save(os.path.join(CONFIG.ROOT_DIR, 'npy/val_labels_VariousFeatures_1000.npy'), val_labels)

In [None]:
def load_np():
    train_features = np.load(os.path.join(CONFIG.ROOT_DIR, 'npy/train_features_VariousFeatures_1000.npy'))
    train_labels = np.load(os.path.join(CONFIG.ROOT_DIR, 'npy/train_labels_VariousFeatures_1000.npy'))
    val_features = np.load(os.path.join(CONFIG.ROOT_DIR, 'npy/val_features_VariousFeatures_1000.npy'))
    val_labels = np.load(os.path.join(CONFIG.ROOT_DIR, 'npy/val_labels_VariousFeatures_1000.npy'))
    
    return train_features, train_labels, val_features, val_labels

In [None]:
# save_np()
# train_features, train_labels, val_features, val_labels = load_np()

In [16]:
train_features = np.array(train_features)
train_labels = np.array(train_labels)
val_features = np.array(val_features)
val_labels = np.array(val_labels)

In [17]:
print(train_features.shape, len(train_labels))
print(val_features.shape, len(val_labels))

(75395, 40) 75395
(11088, 40) 11088


### Dataset

In [18]:
class CustomDataset(Dataset):
    def __init__(self, mfcc, label):
        self.mfcc = mfcc
        self.label = label

    def __len__(self):
        return len(self.mfcc)

    def __getitem__(self, index):
        if self.label is not None:
            return self.mfcc[index], self.label[index]
        return self.mfcc[index]

In [19]:
train_dataset = CustomDataset(train_features, train_labels)
val_dataset = CustomDataset(val_features, val_labels)

In [20]:
train_loader = DataLoader(
    train_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False
)

### Define Model

In [22]:
class MLP(nn.Module):
    def __init__(self, input_dim=40, output_dim=CONFIG.N_CLASSES):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.dropout1 = nn.Dropout(0.3)
        
        self.fc2 = nn.Linear(128, 128)
        self.bn2 = nn.BatchNorm1d(128)
        self.dropout2 = nn.Dropout(0.3)
        
        self.fc3 = nn.Linear(128, 64)
        self.bn3 = nn.BatchNorm1d(64)
        self.dropout3 = nn.Dropout(0.3)
        
        self.fc4 = nn.Linear(64, output_dim)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)
        x = self.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)
        x = self.relu(self.bn3(self.fc3(x)))
        x = self.dropout3(x)
        x = self.fc4(x)
        x = self.sigmoid(x)
        return x

### MLFlow

In [28]:
import mlflow
import mlflow.pytorch

mlflow.set_experiment('FakeVoice')

def mlflow_run_decorator(run_name=None):
    def decorator(func):
        def wrapper(*args, **kwargs):
            mlflow.start_run(run_name=run_name)
            try:
                result = func(*args, **kwargs)
                mlflow.set_tag("Status", "SUCCEESS")
            except Exception as e:
                mlflow.log_param("Exception", e)
                mlflow.set_tag("Status", "FAIL")
                raise e
            finally:
                mlflow.end_run()
            return result
        return wrapper
    return decorator

### Train & Validation

In [29]:
from sklearn.metrics import roc_auc_score
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler

In [30]:
@mlflow_run_decorator(run_name='MLP_Augment')
def training(model, scheduler, optimizer, train_loader, val_loader, device):
    mlflow.log_params(vars(CONFIG))
    
    model.to(device)
    criterion = nn.BCELoss().to(device)
    
    best_val_score = 0
    best_model = None
    
    for epoch in range(1, CONFIG.N_EPOCHS+1):
        model.train()
        train_loss = []
        for features, labels in tqdm(iter(train_loader)):
            features = features.float().to(device)
            labels = labels.float().to(device)
            
            optimizer.zero_grad()
            
            output = model(features)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val AUC : [{_val_score:.5f}] LEARNING RATE : [{optimizer.param_groups[0]["lr"]:.5f}]')
    
        mlflow.log_metrics({'train_loss': _train_loss, 'val_loss': _val_loss, 'val_auc': _val_score}, step=epoch)
        scheduler.step(_val_loss)
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
    
    return best_model

In [31]:
def multiLabel_AUC(y_true, y_scores):
    auc_scores = []
    for i in range(y_true.shape[1]):
        auc = roc_auc_score(y_true[:, i], y_scores[:, i])
        auc_scores.append(auc)
    mean_auc_score = np.mean(auc_scores)
    return mean_auc_score

In [32]:
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss, all_labels, all_probs = [], [], []
    
    with torch.no_grad():
        for features, labels in tqdm(iter(val_loader)):
            features = features.float().to(device)
            labels = labels.float().to(device)
            
            probs = model(features)
            
            loss = criterion(probs, labels)

            val_loss.append(loss.item())

            all_labels.append(labels.cpu().numpy())
            all_probs.append(probs.cpu().numpy())
        
        _val_loss = np.mean(val_loss)

        all_labels = np.concatenate(all_labels, axis=0)
        all_probs = np.concatenate(all_probs, axis=0)
        
        # Calculate AUC score
        auc_score = multiLabel_AUC(all_labels, all_probs)
    
    return _val_loss, auc_score

### Run

In [33]:
model = MLP()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CONFIG.LEARNING_RATE)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True)

infer_model = training(model, scheduler, optimizer, train_loader, val_loader, device)

100%|██████████| 1179/1179 [00:04<00:00, 246.18it/s]
100%|██████████| 174/174 [00:00<00:00, 717.80it/s]


Epoch [1], Train Loss : [0.46845] Val Loss : [0.15661] Val AUC : [0.98742] LEARNING RATE : [0.00030]


100%|██████████| 1179/1179 [00:03<00:00, 303.48it/s]
100%|██████████| 174/174 [00:00<00:00, 637.59it/s]


Epoch [2], Train Loss : [0.32698] Val Loss : [0.09368] Val AUC : [0.99671] LEARNING RATE : [0.00030]


100%|██████████| 1179/1179 [00:03<00:00, 298.52it/s]
100%|██████████| 174/174 [00:00<00:00, 655.23it/s]


Epoch [3], Train Loss : [0.28771] Val Loss : [0.06184] Val AUC : [0.99841] LEARNING RATE : [0.00030]


100%|██████████| 1179/1179 [00:04<00:00, 294.40it/s]
100%|██████████| 174/174 [00:00<00:00, 676.97it/s]


Epoch [4], Train Loss : [0.26030] Val Loss : [0.05236] Val AUC : [0.99893] LEARNING RATE : [0.00030]


100%|██████████| 1179/1179 [00:04<00:00, 285.54it/s]
100%|██████████| 174/174 [00:00<00:00, 694.10it/s]


Epoch [5], Train Loss : [0.24668] Val Loss : [0.05133] Val AUC : [0.99928] LEARNING RATE : [0.00030]


100%|██████████| 1179/1179 [00:04<00:00, 281.47it/s]
100%|██████████| 174/174 [00:00<00:00, 680.92it/s]


Epoch [6], Train Loss : [0.23054] Val Loss : [0.05261] Val AUC : [0.99938] LEARNING RATE : [0.00030]


100%|██████████| 1179/1179 [00:04<00:00, 286.25it/s]
100%|██████████| 174/174 [00:00<00:00, 584.97it/s]


Epoch [7], Train Loss : [0.21901] Val Loss : [0.04160] Val AUC : [0.99959] LEARNING RATE : [0.00030]


100%|██████████| 1179/1179 [00:04<00:00, 267.25it/s]
100%|██████████| 174/174 [00:00<00:00, 587.74it/s]


Epoch [8], Train Loss : [0.21041] Val Loss : [0.03653] Val AUC : [0.99957] LEARNING RATE : [0.00030]


100%|██████████| 1179/1179 [00:04<00:00, 288.44it/s]
100%|██████████| 174/174 [00:00<00:00, 544.68it/s]


Epoch [9], Train Loss : [0.20971] Val Loss : [0.03743] Val AUC : [0.99959] LEARNING RATE : [0.00030]


100%|██████████| 1179/1179 [00:03<00:00, 294.89it/s]
100%|██████████| 174/174 [00:00<00:00, 625.48it/s]


Epoch [10], Train Loss : [0.19702] Val Loss : [0.03282] Val AUC : [0.99969] LEARNING RATE : [0.00030]


100%|██████████| 1179/1179 [00:04<00:00, 280.95it/s]
100%|██████████| 174/174 [00:00<00:00, 610.86it/s]


Epoch [11], Train Loss : [0.19000] Val Loss : [0.03400] Val AUC : [0.99981] LEARNING RATE : [0.00030]


100%|██████████| 1179/1179 [00:04<00:00, 286.41it/s]
100%|██████████| 174/174 [00:00<00:00, 634.74it/s]


Epoch [12], Train Loss : [0.18692] Val Loss : [0.03011] Val AUC : [0.99981] LEARNING RATE : [0.00030]


100%|██████████| 1179/1179 [00:03<00:00, 302.75it/s]
100%|██████████| 174/174 [00:00<00:00, 632.47it/s]


Epoch [13], Train Loss : [0.18153] Val Loss : [0.03077] Val AUC : [0.99965] LEARNING RATE : [0.00030]


100%|██████████| 1179/1179 [00:04<00:00, 291.10it/s]
100%|██████████| 174/174 [00:00<00:00, 613.67it/s]


Epoch [14], Train Loss : [0.17884] Val Loss : [0.03248] Val AUC : [0.99977] LEARNING RATE : [0.00030]


100%|██████████| 1179/1179 [00:04<00:00, 274.49it/s]
100%|██████████| 174/174 [00:00<00:00, 602.28it/s]


Epoch [15], Train Loss : [0.17258] Val Loss : [0.02558] Val AUC : [0.99979] LEARNING RATE : [0.00030]


100%|██████████| 1179/1179 [00:04<00:00, 284.65it/s]
100%|██████████| 174/174 [00:00<00:00, 582.58it/s]


Epoch [16], Train Loss : [0.17011] Val Loss : [0.02463] Val AUC : [0.99988] LEARNING RATE : [0.00030]


100%|██████████| 1179/1179 [00:04<00:00, 275.64it/s]
100%|██████████| 174/174 [00:00<00:00, 566.64it/s]


Epoch [17], Train Loss : [0.17109] Val Loss : [0.03026] Val AUC : [0.99967] LEARNING RATE : [0.00030]


100%|██████████| 1179/1179 [00:04<00:00, 269.92it/s]
100%|██████████| 174/174 [00:00<00:00, 648.82it/s]


Epoch [18], Train Loss : [0.16296] Val Loss : [0.02968] Val AUC : [0.99985] LEARNING RATE : [0.00030]


100%|██████████| 1179/1179 [00:04<00:00, 285.40it/s]
100%|██████████| 174/174 [00:00<00:00, 628.02it/s]


Epoch [19], Train Loss : [0.16355] Val Loss : [0.03257] Val AUC : [0.99988] LEARNING RATE : [0.00030]


100%|██████████| 1179/1179 [00:04<00:00, 293.07it/s]
100%|██████████| 174/174 [00:00<00:00, 637.00it/s]


Epoch [20], Train Loss : [0.16153] Val Loss : [0.02346] Val AUC : [0.99989] LEARNING RATE : [0.00030]


100%|██████████| 1179/1179 [00:04<00:00, 285.85it/s]
100%|██████████| 174/174 [00:00<00:00, 636.11it/s]


Epoch [21], Train Loss : [0.15979] Val Loss : [0.02483] Val AUC : [0.99983] LEARNING RATE : [0.00030]


100%|██████████| 1179/1179 [00:04<00:00, 292.47it/s]
100%|██████████| 174/174 [00:00<00:00, 643.00it/s]


Epoch [22], Train Loss : [0.16040] Val Loss : [0.02424] Val AUC : [0.99987] LEARNING RATE : [0.00030]


100%|██████████| 1179/1179 [00:03<00:00, 304.77it/s]
100%|██████████| 174/174 [00:00<00:00, 631.75it/s]


Epoch [23], Train Loss : [0.15651] Val Loss : [0.02556] Val AUC : [0.99991] LEARNING RATE : [0.00030]


100%|██████████| 1179/1179 [00:03<00:00, 299.55it/s]
100%|██████████| 174/174 [00:00<00:00, 649.76it/s]


Epoch [24], Train Loss : [0.15330] Val Loss : [0.02562] Val AUC : [0.99990] LEARNING RATE : [0.00030]


100%|██████████| 1179/1179 [00:04<00:00, 275.55it/s]
100%|██████████| 174/174 [00:00<00:00, 600.10it/s]


Epoch [25], Train Loss : [0.15437] Val Loss : [0.03164] Val AUC : [0.99987] LEARNING RATE : [0.00030]


100%|██████████| 1179/1179 [00:04<00:00, 281.51it/s]
100%|██████████| 174/174 [00:00<00:00, 609.36it/s]


Epoch [26], Train Loss : [0.15138] Val Loss : [0.02807] Val AUC : [0.99983] LEARNING RATE : [0.00030]
Epoch 00026: reducing learning rate of group 0 to 3.0000e-05.


100%|██████████| 1179/1179 [00:04<00:00, 279.61it/s]
100%|██████████| 174/174 [00:00<00:00, 618.65it/s]


Epoch [27], Train Loss : [0.14144] Val Loss : [0.02615] Val AUC : [0.99986] LEARNING RATE : [0.00003]


100%|██████████| 1179/1179 [00:04<00:00, 266.40it/s]
100%|██████████| 174/174 [00:00<00:00, 565.76it/s]


Epoch [28], Train Loss : [0.13877] Val Loss : [0.02805] Val AUC : [0.99989] LEARNING RATE : [0.00003]


100%|██████████| 1179/1179 [00:04<00:00, 279.11it/s]
100%|██████████| 174/174 [00:00<00:00, 614.76it/s]


Epoch [29], Train Loss : [0.13909] Val Loss : [0.02635] Val AUC : [0.99984] LEARNING RATE : [0.00003]


100%|██████████| 1179/1179 [00:03<00:00, 302.09it/s]
100%|██████████| 174/174 [00:00<00:00, 608.31it/s]


Epoch [30], Train Loss : [0.13787] Val Loss : [0.02473] Val AUC : [0.99990] LEARNING RATE : [0.00003]


100%|██████████| 1179/1179 [00:04<00:00, 285.71it/s]
100%|██████████| 174/174 [00:00<00:00, 627.57it/s]


Epoch [31], Train Loss : [0.13753] Val Loss : [0.02637] Val AUC : [0.99992] LEARNING RATE : [0.00003]


100%|██████████| 1179/1179 [00:04<00:00, 290.02it/s]
100%|██████████| 174/174 [00:00<00:00, 580.88it/s]


Epoch [32], Train Loss : [0.13600] Val Loss : [0.02660] Val AUC : [0.99985] LEARNING RATE : [0.00003]
Epoch 00032: reducing learning rate of group 0 to 3.0000e-06.


100%|██████████| 1179/1179 [00:03<00:00, 306.14it/s]
100%|██████████| 174/174 [00:00<00:00, 631.48it/s]


Epoch [33], Train Loss : [0.13824] Val Loss : [0.02313] Val AUC : [0.99992] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:03<00:00, 299.26it/s]
100%|██████████| 174/174 [00:00<00:00, 591.28it/s]


Epoch [34], Train Loss : [0.13624] Val Loss : [0.02478] Val AUC : [0.99987] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 272.14it/s]
100%|██████████| 174/174 [00:00<00:00, 589.64it/s]


Epoch [35], Train Loss : [0.13722] Val Loss : [0.02478] Val AUC : [0.99992] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 282.01it/s]
100%|██████████| 174/174 [00:00<00:00, 610.25it/s]


Epoch [36], Train Loss : [0.13505] Val Loss : [0.02257] Val AUC : [0.99991] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 280.47it/s]
100%|██████████| 174/174 [00:00<00:00, 589.70it/s]


Epoch [37], Train Loss : [0.13298] Val Loss : [0.02502] Val AUC : [0.99989] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 270.20it/s]
100%|██████████| 174/174 [00:00<00:00, 606.16it/s]


Epoch [38], Train Loss : [0.13884] Val Loss : [0.02327] Val AUC : [0.99989] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 276.82it/s]
100%|██████████| 174/174 [00:00<00:00, 558.49it/s]


Epoch [39], Train Loss : [0.13684] Val Loss : [0.02553] Val AUC : [0.99991] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:03<00:00, 303.97it/s]
100%|██████████| 174/174 [00:00<00:00, 636.10it/s]


Epoch [40], Train Loss : [0.13659] Val Loss : [0.02600] Val AUC : [0.99989] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 291.70it/s]
100%|██████████| 174/174 [00:00<00:00, 633.78it/s]


Epoch [41], Train Loss : [0.13354] Val Loss : [0.02527] Val AUC : [0.99979] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 285.44it/s]
100%|██████████| 174/174 [00:00<00:00, 603.22it/s]


Epoch [42], Train Loss : [0.13478] Val Loss : [0.02359] Val AUC : [0.99989] LEARNING RATE : [0.00000]
Epoch 00042: reducing learning rate of group 0 to 3.0000e-07.


100%|██████████| 1179/1179 [00:03<00:00, 304.11it/s]
100%|██████████| 174/174 [00:00<00:00, 601.68it/s]


Epoch [43], Train Loss : [0.13571] Val Loss : [0.02236] Val AUC : [0.99993] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 294.27it/s]
100%|██████████| 174/174 [00:00<00:00, 624.60it/s]


Epoch [44], Train Loss : [0.13402] Val Loss : [0.02618] Val AUC : [0.99990] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 275.07it/s]
100%|██████████| 174/174 [00:00<00:00, 605.13it/s]


Epoch [45], Train Loss : [0.13392] Val Loss : [0.02591] Val AUC : [0.99983] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 284.13it/s]
100%|██████████| 174/174 [00:00<00:00, 620.78it/s]


Epoch [46], Train Loss : [0.13403] Val Loss : [0.02695] Val AUC : [0.99992] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 284.30it/s]
100%|██████████| 174/174 [00:00<00:00, 633.78it/s]


Epoch [47], Train Loss : [0.13474] Val Loss : [0.02492] Val AUC : [0.99992] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 272.22it/s]
100%|██████████| 174/174 [00:00<00:00, 580.86it/s]


Epoch [48], Train Loss : [0.13860] Val Loss : [0.02675] Val AUC : [0.99985] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 276.38it/s]
100%|██████████| 174/174 [00:00<00:00, 635.83it/s]


Epoch [49], Train Loss : [0.13479] Val Loss : [0.02594] Val AUC : [0.99982] LEARNING RATE : [0.00000]
Epoch 00049: reducing learning rate of group 0 to 3.0000e-08.


100%|██████████| 1179/1179 [00:03<00:00, 305.24it/s]
100%|██████████| 174/174 [00:00<00:00, 581.61it/s]


Epoch [50], Train Loss : [0.13566] Val Loss : [0.02218] Val AUC : [0.99993] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:03<00:00, 299.96it/s]
100%|██████████| 174/174 [00:00<00:00, 606.78it/s]


Epoch [51], Train Loss : [0.13726] Val Loss : [0.02463] Val AUC : [0.99989] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 289.69it/s]
100%|██████████| 174/174 [00:00<00:00, 609.56it/s]


Epoch [52], Train Loss : [0.13681] Val Loss : [0.02511] Val AUC : [0.99982] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:03<00:00, 300.33it/s]
100%|██████████| 174/174 [00:00<00:00, 617.16it/s]


Epoch [53], Train Loss : [0.13552] Val Loss : [0.02374] Val AUC : [0.99990] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:03<00:00, 295.54it/s]
100%|██████████| 174/174 [00:00<00:00, 597.48it/s]


Epoch [54], Train Loss : [0.13822] Val Loss : [0.02484] Val AUC : [0.99992] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 271.22it/s]
100%|██████████| 174/174 [00:00<00:00, 603.02it/s]


Epoch [55], Train Loss : [0.13493] Val Loss : [0.02843] Val AUC : [0.99992] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 271.74it/s]
100%|██████████| 174/174 [00:00<00:00, 635.24it/s]


Epoch [56], Train Loss : [0.13507] Val Loss : [0.02509] Val AUC : [0.99991] LEARNING RATE : [0.00000]
Epoch 00056: reducing learning rate of group 0 to 3.0000e-09.


100%|██████████| 1179/1179 [00:04<00:00, 291.59it/s]
100%|██████████| 174/174 [00:00<00:00, 591.67it/s]


Epoch [57], Train Loss : [0.13620] Val Loss : [0.02906] Val AUC : [0.99981] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 274.60it/s]
100%|██████████| 174/174 [00:00<00:00, 592.76it/s]


Epoch [58], Train Loss : [0.13565] Val Loss : [0.02843] Val AUC : [0.99991] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 280.65it/s]
100%|██████████| 174/174 [00:00<00:00, 534.43it/s]


Epoch [59], Train Loss : [0.13641] Val Loss : [0.02422] Val AUC : [0.99992] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:03<00:00, 301.95it/s]
100%|██████████| 174/174 [00:00<00:00, 635.27it/s]


Epoch [60], Train Loss : [0.13438] Val Loss : [0.02726] Val AUC : [0.99990] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:03<00:00, 297.48it/s]
100%|██████████| 174/174 [00:00<00:00, 608.20it/s]


Epoch [61], Train Loss : [0.13657] Val Loss : [0.02274] Val AUC : [0.99991] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 288.35it/s]
100%|██████████| 174/174 [00:00<00:00, 639.63it/s]


Epoch [62], Train Loss : [0.13465] Val Loss : [0.02083] Val AUC : [0.99992] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:03<00:00, 297.98it/s]
100%|██████████| 174/174 [00:00<00:00, 601.75it/s]


Epoch [63], Train Loss : [0.14009] Val Loss : [0.02261] Val AUC : [0.99994] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 292.18it/s]
100%|██████████| 174/174 [00:00<00:00, 588.69it/s]


Epoch [64], Train Loss : [0.13404] Val Loss : [0.02646] Val AUC : [0.99991] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 274.24it/s]
100%|██████████| 174/174 [00:00<00:00, 624.69it/s]


Epoch [65], Train Loss : [0.13407] Val Loss : [0.02557] Val AUC : [0.99991] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 270.18it/s]
100%|██████████| 174/174 [00:00<00:00, 611.49it/s]


Epoch [66], Train Loss : [0.13511] Val Loss : [0.02844] Val AUC : [0.99988] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 283.48it/s]
100%|██████████| 174/174 [00:00<00:00, 622.28it/s]


Epoch [67], Train Loss : [0.13409] Val Loss : [0.02388] Val AUC : [0.99990] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 274.70it/s]
100%|██████████| 174/174 [00:00<00:00, 626.45it/s]


Epoch [68], Train Loss : [0.13506] Val Loss : [0.02446] Val AUC : [0.99991] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 280.02it/s]
100%|██████████| 174/174 [00:00<00:00, 567.48it/s]


Epoch [69], Train Loss : [0.13334] Val Loss : [0.02322] Val AUC : [0.99991] LEARNING RATE : [0.00000]


100%|██████████| 1179/1179 [00:04<00:00, 294.43it/s]
100%|██████████| 174/174 [00:00<00:00, 623.60it/s]


Epoch [70], Train Loss : [0.13665] Val Loss : [0.02707] Val AUC : [0.99970] LEARNING RATE : [0.00000]


### Inference

In [15]:
test = pd.read_csv(os.path.join(CONFIG.ROOT_DIR, 'test.csv'))
test_mfcc = get_features(test, train_mode=False, augment=False)

100%|██████████| 50000/50000 [11:06<00:00, 75.05it/s]


In [34]:
test_dataset = CustomDataset(test_mfcc, None)
test_loader = DataLoader(
    test_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False
)

In [35]:
# np.save(os.path.join(CONFIG.ROOT_DIR, 'npy/test_VariousFeatures_1000.npy'), test_mfcc)
# test_mfcc = np.load(os.path.join(CONFIG.ROOT_DIR, 'npy/test_VariousFeatures_1000.npy'))

In [36]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    predictions = []
    with torch.no_grad():
        for features in tqdm(iter(test_loader)):
            features = features.float().to(device)
            
            probs = model(features)

            probs  = probs.cpu().detach().numpy()
            predictions += probs.tolist()
    return predictions

In [37]:
preds = inference(infer_model, test_loader, device)

100%|██████████| 782/782 [00:01<00:00, 656.64it/s]


### Submission

In [38]:
submit = pd.read_csv(os.path.join(CONFIG.ROOT_DIR,'./sample_submission.csv'))
submit.iloc[:, 1:] = preds
submit.head()

Unnamed: 0,id,fake,real
0,TEST_00000,0.714056,0.352337
1,TEST_00001,0.521525,0.493831
2,TEST_00002,0.520113,0.539029
3,TEST_00003,0.133667,0.959455
4,TEST_00004,0.244149,0.993702


In [40]:
submit.to_csv(f'./output/submit_MLP_Augment.csv', index=False)

### AfterTest

In [None]:
print(model(torch.tensor(train_features).float().to(device)).cpu().detach().numpy()[:10])
print(train_labels[:10])

In [None]:
np.where((train_labels[:, 0] == 1) & (train_labels[:, 1] == 1))[0]