In [1]:
!pip install librosa
!pip install torchmetrics
!pip install tensorflow



In [2]:
import librosa

from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import random

from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

import torch
import torchmetrics
import os

import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, LSTM, TimeDistributed, Activation
from tensorflow.keras.models import Model

from sklearn.metrics import roc_auc_score

In [3]:
import warnings
warnings.filterwarnings('ignore')
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [4]:
class Config:
    SR = 32000
    N_MFCC = 13
    # Dataset
    ROOT_FOLDER = r"C:\Users\KimDongyoung\Downloads\SW중심대학"
    N_CLASSES = 2
    BATCH_SIZE = 128
    N_EPOCHS = 5
    LR = 1e-4
    DROPOUT_RATE = 0.3
    # Others
    SEED = 42
    
CONFIG = Config()

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(CONFIG.SEED) # Seed 고정

In [5]:
# Load your DataFrame
train_df = pd.read_csv('C:/Users/KimDongyoung/Downloads/SW중심대학/train.csv')
test_df = pd.read_csv('C:/Users/KimDongyoung/Downloads/SW중심대학/test.csv')
submission_df = pd.read_csv('C:/Users/KimDongyoung/Downloads/SW중심대학/sample_submission.csv')

# Split the training data into train and validation sets
train, val = train_test_split(train_df, test_size=0.2, random_state=CONFIG.SEED)

# Ensure paths are updated
def update_path(df, root_folder):
    df['path'] = df['path'].apply(lambda x: os.path.join(root_folder, x))
    return df

train = update_path(train, CONFIG.ROOT_FOLDER)
val = update_path(val, CONFIG.ROOT_FOLDER)
test_df = update_path(test_df, CONFIG.ROOT_FOLDER)

print("Train DataFrame:")
print(train.head())

print("\nValidation DataFrame:")
print(val.head())

print("\nTest DataFrame:")
print(test_df.head())

Train DataFrame:
             id                                               path label
6804   SNGJTJQG  C:\Users\KimDongyoung\Downloads\SW중심대학\./train...  fake
3734   LIYTDJZZ  C:\Users\KimDongyoung\Downloads\SW중심대학\./train...  fake
55413  HAMPQOIN  C:\Users\KimDongyoung\Downloads\SW중심대학\./train...  real
10741  UCJMLYVH  C:\Users\KimDongyoung\Downloads\SW중심대학\./train...  fake
33027  EUKZRQPD  C:\Users\KimDongyoung\Downloads\SW중심대학\./train...  real

Validation DataFrame:
             id                                               path label
49798  PUOXNOKJ  C:\Users\KimDongyoung\Downloads\SW중심대학\./train...  real
54292  GXOIPDJP  C:\Users\KimDongyoung\Downloads\SW중심대학\./train...  fake
40359  FOEQKPPR  C:\Users\KimDongyoung\Downloads\SW중심대학\./train...  fake
50441  IYASAVDT  C:\Users\KimDongyoung\Downloads\SW중심대학\./train...  real
37723  VLWIXPTC  C:\Users\KimDongyoung\Downloads\SW중심대학\./train...  real

Test DataFrame:
           id                                               path
0 

In [6]:
# Define existing functions
def load_audio(file_path, sr=CONFIG.SR):
    y, sr = librosa.load(file_path, sr=sr)
    return y, sr

def extract_features(y, sr, n_mfcc=CONFIG.N_MFCC):
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    return mfcc.T  # Transpose to get time steps as rows

def get_mfcc_feature(df, train_mode=True):
    features = []
    labels = []
    max_len = 0  # To keep track of the maximum sequence length for padding
    
    for _, row in tqdm(df.iterrows()):
        file_path = row['path']
        if not os.path.exists(file_path):
            print(f"File not found: {file_path}")
            continue
        y, sr = load_audio(file_path)
        mfcc = extract_features(y, sr)
        features.append(mfcc)
        if mfcc.shape[0] > max_len:
            max_len = mfcc.shape[0]

        if train_mode:
            label = row['label']
            label_vector = np.zeros(CONFIG.N_CLASSES, dtype=float)
            label_vector[0 if label == 'fake' else 1] = 1
            labels.append(label_vector)
    
    # Pad sequences to have the same length
    features = [np.pad(f, ((0, max_len - f.shape[0]), (0, 0)), mode='constant') for f in features]

    if train_mode:
        return features, labels
    return features

train_mfcc, train_labels = get_mfcc_feature(train, True)
val_mfcc, val_labels = get_mfcc_feature(val, True)

44350it [11:21, 65.08it/s]
11088it [02:39, 69.39it/s]


In [7]:
class CustomDataset(Dataset):
    def __init__(self, mfcc, label):
        self.mfcc = mfcc
        self.label = label

    def __len__(self):
        return len(self.mfcc)

    def __getitem__(self, index):
        if self.label is not None:
            return self.mfcc[index], self.label[index]
        return self.mfcc[index]

train_dataset = CustomDataset(train_mfcc, train_labels)
val_dataset = CustomDataset(val_mfcc, val_labels)

In [8]:
train_loader = DataLoader(
    train_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False
)

In [9]:
# Define RNN Model with multiple LSTM layers and batch normalization
class RNNModel(nn.Module):
    def __init__(self, input_dim=CONFIG.N_MFCC, hidden_dim=128, num_layers=2, output_dim=CONFIG.N_CLASSES, dropout_rate=CONFIG.DROPOUT_RATE):
        super(RNNModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=num_layers, batch_first=True, dropout=dropout_rate)
        self.batch_norm = nn.BatchNorm1d(hidden_dim)
        self.fc1 = nn.Linear(hidden_dim, 64)
        self.fc2 = nn.Linear(64, output_dim)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.batch_norm(x[:, -1, :])  # Apply batch normalization to the output of the last time step
        x = self.dropout(torch.relu(self.fc1(x)))
        x = self.fc2(x)
        return torch.sigmoid(x)

def train(model, optimizer, train_loader, val_loader, device, patience=5):
    model.to(device)
    criterion = nn.BCELoss().to(device)
    
    best_val_score = 0
    best_model = None
    early_stop_count = 0

    # Learning rate scheduler
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.1, patience=2, verbose=True)
    
    for epoch in range(1, CONFIG.N_EPOCHS + 1):
        model.train()
        train_loss = []
        for features, labels in tqdm(train_loader):
            features = features.float().to(device)
            labels = labels.float().to(device)
            
            optimizer.zero_grad()
            
            output = model(features)
            loss = criterion(output, labels)
            
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
            optimizer.step()
            
            train_loss.append(loss.item())
        
        val_loss, val_score = validate(model, criterion, val_loader, device)
        train_loss_mean = np.mean(train_loss)
        
        print(f'Epoch [{epoch}], Train Loss: [{train_loss_mean:.5f}], Val Loss: [{val_loss:.5f}], Val AUC: [{val_score:.5f}]')
        
        scheduler.step(val_loss)  # Adjust learning rate based on validation loss
        
        if val_score > best_val_score:
            best_val_score = val_score
            best_model = model
            early_stop_count = 0
        else:
            early_stop_count += 1
        
        if early_stop_count >= patience:
            print("Early stopping")
            break
    
    return best_model

In [10]:
def validate(model, criterion, val_loader, device):
    model.eval()
    val_loss, all_labels, all_probs = [], [], []
    
    with torch.no_grad():
        for features, labels in val_loader:
            features = features.float().to(device)
            labels = labels.float().to(device)
            
            probs = model(features)
            loss = criterion(probs, labels)
            
            val_loss.append(loss.item())
            all_labels.append(labels.cpu().numpy())
            all_probs.append(probs.cpu().numpy())
    
    val_loss_mean = np.mean(val_loss)
    all_labels = np.concatenate(all_labels, axis=0)
    all_probs = np.concatenate(all_probs, axis=0)
    
    auc_score = roc_auc_score(all_labels, all_probs, average='macro')
    
    return val_loss_mean, auc_score

def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    predictions = []
    
    with torch.no_grad():
        for features in test_loader:
            features = features.float().to(device)
            probs = model(features)
            predictions.extend(probs.cpu().numpy())
    
    return predictions

In [15]:
import torch.optim as optim

In [16]:
# Initialize and train the model
model = RNNModel()
optimizer = torch.optim.Adam(model.parameters(), lr=CONFIG.LR)

best_model = train(model, optimizer, train_loader, val_loader, device)

100%|████████████████████████████████████████████████████████████████████████████████| 347/347 [21:42<00:00,  3.75s/it]


Epoch [1], Train Loss: [0.69661], Val Loss: [0.69339], Val AUC: [0.50232]


100%|████████████████████████████████████████████████████████████████████████████████| 347/347 [21:51<00:00,  3.78s/it]


Epoch [2], Train Loss: [0.69486], Val Loss: [0.69348], Val AUC: [0.50336]


100%|████████████████████████████████████████████████████████████████████████████████| 347/347 [21:51<00:00,  3.78s/it]


Epoch [3], Train Loss: [0.69459], Val Loss: [0.69334], Val AUC: [0.51276]


100%|████████████████████████████████████████████████████████████████████████████████| 347/347 [21:41<00:00,  3.75s/it]


Epoch [4], Train Loss: [0.69418], Val Loss: [0.69328], Val AUC: [0.53636]


100%|████████████████████████████████████████████████████████████████████████████████| 347/347 [21:15<00:00,  3.68s/it]


Epoch [5], Train Loss: [0.69431], Val Loss: [0.69324], Val AUC: [0.50942]


In [17]:
# Prepare the test dataset
test_mfcc = get_mfcc_feature(test_df, False)
test_dataset = CustomDataset(test_mfcc, None)
test_loader = DataLoader(test_dataset, batch_size=CONFIG.BATCH_SIZE, shuffle=False)

50000it [28:27, 29.28it/s]


In [18]:
# Run inference on the test set
predictions = inference(best_model, test_loader, device)

# Prepare the submission file
submission_df['label'] = predictions

In [19]:
submit = pd.read_csv(os.path.join(CONFIG.ROOT_FOLDER, 'sample_submission.csv'))
submit.iloc[:, 1:] = predictions
submit.head()

Unnamed: 0,id,fake,real
0,TEST_00000,0.082641,0.81187
1,TEST_00001,0.405881,0.874013
2,TEST_00002,0.483285,0.890418
3,TEST_00003,0.060824,0.992021
4,TEST_00004,0.140666,0.928351


In [20]:
submit.to_csv(os.path.join(CONFIG.ROOT_FOLDER, '0707_submit.csv'), index=False)