In [7]:
!pip install librosa
!pip install torchmetrics



In [8]:
import librosa

from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import random

from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

import torch
import torchmetrics
import os

from sklearn.metrics import roc_auc_score

In [9]:
import warnings
warnings.filterwarnings('ignore')
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [29]:
class Config:
    SR = 32000
    N_MFCC = 13
    # Dataset
    ROOT_FOLDER = r"C:\Users\KimDongyoung\Downloads\SW중심대학"
    N_CLASSES = 2
    BATCH_SIZE = 96
    N_EPOCHS = 10
    LR = 1e-4
    DROPOUT_RATE = 0.3
    # Others
    SEED = 42
    
CONFIG = Config()

In [30]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CONFIG.SEED) # Seed 고정

In [31]:
# Load your DataFrame
df = pd.read_csv(os.path.join(CONFIG.ROOT_FOLDER, 'train.csv'))
train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, random_state=CONFIG.SEED)

# Update the 'path' column to have the full path
train['path'] = train['path'].apply(lambda x: os.path.join(CONFIG.ROOT_FOLDER, x))
val['path'] = val['path'].apply(lambda x: os.path.join(CONFIG.ROOT_FOLDER, x))

# Ensure test paths are also updated
test = pd.read_csv(os.path.join(CONFIG.ROOT_FOLDER, 'test.csv'))
test['path'] = test['path'].apply(lambda x: os.path.join(CONFIG.ROOT_FOLDER, x))

In [32]:
val

Unnamed: 0,id,path,label
49798,PUOXNOKJ,C:\Users\KimDongyoung\Downloads\SW중심대학\./train...,real
54292,GXOIPDJP,C:\Users\KimDongyoung\Downloads\SW중심대학\./train...,fake
40359,FOEQKPPR,C:\Users\KimDongyoung\Downloads\SW중심대학\./train...,fake
50441,IYASAVDT,C:\Users\KimDongyoung\Downloads\SW중심대학\./train...,real
37723,VLWIXPTC,C:\Users\KimDongyoung\Downloads\SW중심대학\./train...,real
...,...,...,...
39831,WQMWFZRS,C:\Users\KimDongyoung\Downloads\SW중심대학\./train...,fake
10242,KYLYAJSQ,C:\Users\KimDongyoung\Downloads\SW중심대학\./train...,fake
21979,AEFBUARF,C:\Users\KimDongyoung\Downloads\SW중심대학\./train...,real
17086,VDPZMHZX,C:\Users\KimDongyoung\Downloads\SW중심대학\./train...,fake


In [33]:
def get_mfcc_feature(df, train_mode=True):
    features = []
    labels = []
    for _, row in tqdm(df.iterrows()):
        file_path = row['path']
        if not os.path.exists(file_path):
            print(f"File not found: {file_path}")
            continue
        # Load audio file using librosa
        y, sr = librosa.load(file_path, sr=CONFIG.SR)
        
        # Extract MFCC features using librosa
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=CONFIG.N_MFCC)
        mfcc = np.mean(mfcc.T, axis=0)
        features.append(mfcc)

        if train_mode:
            label = row['label']
            label_vector = np.zeros(CONFIG.N_CLASSES, dtype=float)
            label_vector[0 if label == 'fake' else 1] = 1
            labels.append(label_vector)

    if train_mode:
        return features, labels
    return features

# Call the function with the updated paths
train_mfcc, train_labels = get_mfcc_feature(train, True)
val_mfcc, val_labels = get_mfcc_feature(val, True)

44350it [08:48, 83.93it/s] 
11088it [02:11, 84.20it/s]


In [38]:
class CustomDataset(Dataset):
    def __init__(self, mfcc, label):
        self.mfcc = mfcc
        self.label = label

    def __len__(self):
        return len(self.mfcc)

    def __getitem__(self, index):
        if self.label is not None:
            return self.mfcc[index], self.label[index]
        return self.mfcc[index]

train_dataset = CustomDataset(train_mfcc, train_labels)
val_dataset = CustomDataset(val_mfcc, val_labels)

In [39]:
train_loader = DataLoader(
    train_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False
)

In [40]:
class MLP(nn.Module):
    def __init__(self, input_dim=CONFIG.N_MFCC, hidden_dim=256, output_dim=CONFIG.N_CLASSES, dropout_rate=CONFIG.DROPOUT_RATE):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)  # Add dropout
        x = self.relu(self.fc2(x))
        x = self.dropout(x)  # Add dropout
        x = self.fc3(x)
        x = torch.sigmoid(x)
        return x

In [41]:
def train(model, optimizer, train_loader, val_loader, device, patience=5):
    model.to(device)
    criterion = nn.BCELoss().to(device)
    
    best_val_score = 0
    best_model = None
    early_stop_count = 0
    
    for epoch in range(1, CONFIG.N_EPOCHS+1):
        model.train()
        train_loss = []
        for features, labels in tqdm(iter(train_loader)):
            features = features.float().to(device)
            labels = labels.float().to(device)
            
            optimizer.zero_grad()
            
            output = model(features)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val AUC : [{_val_score:.5f}]')
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
            early_stop_count = 0  # Reset early stopping counter
        else:
            early_stop_count += 1
        
        if early_stop_count >= patience:
            print("Early stopping")
            break
    
    return best_model

In [42]:
def multiLabel_AUC(y_true, y_scores):
    auc_scores = []
    for i in range(y_true.shape[1]):
        auc = roc_auc_score(y_true[:, i], y_scores[:, i])
        auc_scores.append(auc)
    mean_auc_score = np.mean(auc_scores)
    return mean_auc_score
    
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss, all_labels, all_probs = [], [], []
    
    with torch.no_grad():
        for features, labels in tqdm(iter(val_loader)):
            features = features.float().to(device)
            labels = labels.float().to(device)
            
            probs = model(features)
            
            loss = criterion(probs, labels)

            val_loss.append(loss.item())

            all_labels.append(labels.cpu().numpy())
            all_probs.append(probs.cpu().numpy())
        
        _val_loss = np.mean(val_loss)

        all_labels = np.concatenate(all_labels, axis=0)
        all_probs = np.concatenate(all_probs, axis=0)
        
        # Calculate AUC score
        auc_score = multiLabel_AUC(all_labels, all_probs)
    
    return _val_loss, auc_score

model = MLP()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CONFIG.LR)

infer_model = train(model, optimizer, train_loader, val_loader, device)

100%|███████████████████████████████████████████████████████████████████████████████| 462/462 [00:01<00:00, 271.46it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 116/116 [00:00<00:00, 755.89it/s]


Epoch [1], Train Loss : [1.57565] Val Loss : [0.60982] Val AUC : [0.80286]


100%|███████████████████████████████████████████████████████████████████████████████| 462/462 [00:01<00:00, 281.85it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 116/116 [00:00<00:00, 796.03it/s]


Epoch [2], Train Loss : [0.64231] Val Loss : [0.55794] Val AUC : [0.85903]


100%|███████████████████████████████████████████████████████████████████████████████| 462/462 [00:01<00:00, 277.35it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 116/116 [00:00<00:00, 784.84it/s]


Epoch [3], Train Loss : [0.55462] Val Loss : [0.46383] Val AUC : [0.88851]


100%|███████████████████████████████████████████████████████████████████████████████| 462/462 [00:01<00:00, 278.19it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 116/116 [00:00<00:00, 744.89it/s]


Epoch [4], Train Loss : [0.50365] Val Loss : [0.40456] Val AUC : [0.90546]


100%|███████████████████████████████████████████████████████████████████████████████| 462/462 [00:01<00:00, 259.07it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 116/116 [00:00<00:00, 769.08it/s]


Epoch [5], Train Loss : [0.45766] Val Loss : [0.37759] Val AUC : [0.92030]


100%|███████████████████████████████████████████████████████████████████████████████| 462/462 [00:01<00:00, 267.50it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 116/116 [00:00<00:00, 739.38it/s]


Epoch [6], Train Loss : [0.42645] Val Loss : [0.35719] Val AUC : [0.92754]


100%|███████████████████████████████████████████████████████████████████████████████| 462/462 [00:01<00:00, 257.77it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 116/116 [00:00<00:00, 796.43it/s]


Epoch [7], Train Loss : [0.40500] Val Loss : [0.33645] Val AUC : [0.93449]


100%|███████████████████████████████████████████████████████████████████████████████| 462/462 [00:01<00:00, 265.24it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 116/116 [00:00<00:00, 744.82it/s]


Epoch [8], Train Loss : [0.38986] Val Loss : [0.33193] Val AUC : [0.93799]


100%|███████████████████████████████████████████████████████████████████████████████| 462/462 [00:01<00:00, 260.76it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 116/116 [00:00<00:00, 765.09it/s]


Epoch [9], Train Loss : [0.37579] Val Loss : [0.31293] Val AUC : [0.94089]


100%|███████████████████████████████████████████████████████████████████████████████| 462/462 [00:01<00:00, 260.19it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 116/116 [00:00<00:00, 779.73it/s]


Epoch [10], Train Loss : [0.36468] Val Loss : [0.31752] Val AUC : [0.94455]


In [43]:
test_mfcc = get_mfcc_feature(test, False)
test_dataset = CustomDataset(test_mfcc, None)
test_loader = DataLoader(
    test_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False
)

50000it [14:11, 58.72it/s]


In [44]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    predictions = []
    with torch.no_grad():
        for features in tqdm(iter(test_loader)):
            features = features.float().to(device)
            
            probs = model(features)

            probs  = probs.cpu().detach().numpy()
            predictions += probs.tolist()
    return predictions

preds = inference(infer_model, test_loader, device)

100%|███████████████████████████████████████████████████████████████████████████████| 521/521 [00:00<00:00, 575.94it/s]


In [45]:
submit = pd.read_csv(os.path.join(CONFIG.ROOT_FOLDER, 'sample_submission.csv'))
submit.iloc[:, 1:] = preds
submit.head()

Unnamed: 0,id,fake,real
0,TEST_00000,0.140271,0.858046
1,TEST_00001,0.04027,0.960261
2,TEST_00002,0.16937,0.833355
3,TEST_00003,0.084579,0.915713
4,TEST_00004,0.469002,0.531368


In [46]:
submit.to_csv(os.path.join(CONFIG.ROOT_FOLDER, '0701(2)_submit.csv'), index=False)