In [21]:
!pip install librosa
!pip install torchmetrics



In [22]:
import librosa

from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import random

from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

import torch
import torchmetrics
import os

from sklearn.metrics import roc_auc_score

import warnings
warnings.filterwarnings('ignore')
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [23]:
class Config:
    SR = 32000
    N_MFCC = 13
    # Dataset
    ROOT_FOLDER = r"C:\Users\KimDongyoung\Downloads\SW중심대학"
    N_CLASSES = 2
    BATCH_SIZE = 128
    N_EPOCHS = 3
    LR = 1e-4
    DROPOUT_RATE = 0.3
    # Others
    SEED = 42
    
CONFIG = Config()

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CONFIG.SEED) # Seed 고정

# Load your DataFrame
df = pd.read_csv(os.path.join(CONFIG.ROOT_FOLDER, 'train.csv'))
train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, random_state=CONFIG.SEED)

# Update the 'path' column to have the full path
train['path'] = train['path'].apply(lambda x: os.path.join(CONFIG.ROOT_FOLDER, x))
val['path'] = val['path'].apply(lambda x: os.path.join(CONFIG.ROOT_FOLDER, x))

# Ensure test paths are also updated
test = pd.read_csv(os.path.join(CONFIG.ROOT_FOLDER, 'test.csv'))
test['path'] = test['path'].apply(lambda x: os.path.join(CONFIG.ROOT_FOLDER, x))

In [24]:
def get_mfcc_feature(df, train_mode=True):
    features = []
    labels = []
    for _, row in tqdm(df.iterrows()):
        file_path = row['path']
        if not os.path.exists(file_path):
            print(f"File not found: {file_path}")
            continue
        # Load audio file using librosa
        y, sr = librosa.load(file_path, sr=CONFIG.SR)
        
        # Extract MFCC features using librosa
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=CONFIG.N_MFCC)
        mfcc = np.mean(mfcc.T, axis=0)
        features.append(mfcc)

        if train_mode:
            label = row['label']
            label_vector = np.zeros(CONFIG.N_CLASSES, dtype=float)
            label_vector[0 if label == 'fake' else 1] = 1
            labels.append(label_vector)

    if train_mode:
        return features, labels
    return features

In [25]:
# Call the function with the updated paths
train_mfcc, train_labels = get_mfcc_feature(train, True)
val_mfcc, val_labels = get_mfcc_feature(val, True)

4527it [00:59, 75.70it/s]


KeyboardInterrupt: 

In [None]:
class CustomDataset(Dataset):
    def __init__(self, mfcc, label):
        self.mfcc = mfcc
        self.label = label

    def __len__(self):
        return len(self.mfcc)

    def __getitem__(self, index):
        if self.label is not None:
            return self.mfcc[index], self.label[index]
        return self.mfcc[index]

train_dataset = CustomDataset(train_mfcc, train_labels)
val_dataset = CustomDataset(val_mfcc, val_labels)

train_loader = DataLoader(
    train_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False
)

In [None]:
from torch import optim

In [None]:
# Define the RNN model

class RNNModel(nn.Module):
    def __init__(self, input_dim=CONFIG.N_MFCC, hidden_dim=128, output_dim=CONFIG.N_CLASSES, dropout_rate=CONFIG.DROPOUT_RATE):
        super(RNNModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.dropout(x)
        x = self.fc(x[:, -1, :])
        return torch.sigmoid(x)

def train(model, optimizer, train_loader, val_loader, device, patience=5):
    model.to(device)
    criterion = nn.BCELoss().to(device)
    
    best_val_score = 0
    best_model = None
    early_stop_count = 0
    
    for epoch in range(1, CONFIG.N_EPOCHS + 1):
        model.train()
        train_loss = []
        for features, labels in tqdm(train_loader):
            features = features.float().to(device)
            labels = labels.float().to(device)
            
            optimizer.zero_grad()
            
            output = model(features)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
        
        val_loss, val_score = validate(model, criterion, val_loader, device)
        train_loss_mean = np.mean(train_loss)
        
        print(f'Epoch [{epoch}], Train Loss: [{train_loss_mean:.5f}], Val Loss: [{val_loss:.5f}], Val AUC: [{val_score:.5f}]')
        
        if val_score > best_val_score:
            best_val_score = val_score
            best_model = model
            early_stop_count = 0
        else:
            early_stop_count += 1
        
        if early_stop_count >= patience:
            print("Early stopping")
            break
    
    return best_model

In [19]:
def validate(model, criterion, val_loader, device):
    model.eval()
    val_loss, all_labels, all_probs = [], [], []
    
    with torch.no_grad():
        for features, labels in val_loader:
            features = features.float().to(device)
            labels = labels.float().to(device)
            
            probs = model(features)
            loss = criterion(probs, labels)
            
            val_loss.append(loss.item())
            all_labels.append(labels.cpu().numpy())
            all_probs.append(probs.cpu().numpy())
    
    val_loss_mean = np.mean(val_loss)
    all_labels = np.concatenate(all_labels, axis=0)
    all_probs = np.concatenate(all_probs, axis=0)
    
    auc_score = roc_auc_score(all_labels, all_probs, average='macro')
    
    return val_loss_mean, auc_score

def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    predictions = []
    
    with torch.no_grad():
        for features in test_loader:
            features = features.float().to(device)
            probs = model(features)
            predictions.extend(probs.cpu().numpy())
    
    return predictions

In [20]:
# Initialize and train the model
model = RNNModel()
optimizer = torch.optim.Adam(model.parameters(), lr=CONFIG.LR)

best_model = train(model, optimizer, train_loader, val_loader, device)

  0%|                                                                                          | 0/347 [00:00<?, ?it/s]


IndexError: too many indices for tensor of dimension 2

In [None]:
test_mfcc = get_mfcc_feature(test, False)
test_dataset = CustomDataset(test_mfcc, None)
test_loader = DataLoader(
    test_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False
)

In [None]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    predictions = []
    with torch.no_grad():
        for features in tqdm(iter(test_loader)):
            features = features.float().to(device)
            
            probs = model(features)

            probs  = probs.cpu().detach().numpy()
            predictions += probs.tolist()
    return predictions

In [None]:
preds = inference(infer_model, test_loader, device)

In [None]:
pip install librosa tensorflow

In [None]:
import os
import numpy as np
import pandas as pd
import librosa
import tensorflow as tf
from tqdm import tqdm
from sklearn.model_selection import train_test_split

import tensorflow_hub as hub
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy

In [None]:
# Define the Config class
class Config:
    SR = 16000  # YAMNet expects 16kHz sample rate
    N_MELS = 64  # YAMNet expects 64 Mel bands
    ROOT_FOLDER = 'C:/Users/KimDongyoung/Downloads/SW중심대학'
    N_CLASSES = 2
    BATCH_SIZE = 128
    N_EPOCHS = 10
    LR = 1e-4
    DROPOUT_RATE = 0.3
    SEED = 42

CONFIG = Config()

# Function to set the seed for reproducibility
def seed_everything(seed):
    np.random.seed(seed)
    tf.random.set_seed(seed)

seed_everything(CONFIG.SEED) # Fix the seed

# Load your DataFrame
train_df = pd.read_csv(os.path.join(CONFIG.ROOT_FOLDER, 'train.csv'))
test_df = pd.read_csv(os.path.join(CONFIG.ROOT_FOLDER, 'test.csv'))
submission_df = pd.read_csv(os.path.join(CONFIG.ROOT_FOLDER, 'sample_submission.csv'))

# Split the training data into train and validation sets
train, val = train_test_split(train_df, test_size=0.2, random_state=CONFIG.SEED)


In [None]:
# Ensure paths are updated
def update_path(df, root_folder):
    df['path'] = df['path'].apply(lambda x: os.path.join(root_folder, x))
    return df

train = update_path(train, CONFIG.ROOT_FOLDER)
val = update_path(val, CONFIG.ROOT_FOLDER)
test_df = update_path(test_df, CONFIG.ROOT_FOLDER)

In [None]:
def load_audio(file_path, sr=CONFIG.SR):
    y, sr = librosa.load(file_path, sr=sr)
    return y, sr

def extract_mel_spectrogram(y, sr, n_mels=CONFIG.N_MELS):
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels)
    mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
    return mel_spec_db

def get_mel_spectrogram_feature(df, train_mode=True):
    features = []
    labels = []
    
    for _, row in tqdm(df.iterrows()):
        file_path = row['path']
        if not os.path.exists(file_path):
            print(f"File not found: {file_path}")
            continue
        y, sr = load_audio(file_path)
        mel_spec = extract_mel_spectrogram(y, sr)
        features.append(mel_spec)

        if train_mode:
            label = row['label']
            label_vector = np.zeros(CONFIG.N_CLASSES, dtype=float)
            label_vector[0 if label == 'fake' else 1] = 1
            labels.append(label_vector)

    if train_mode:
        return features, labels
    return features


In [None]:
# Extract Mel spectrogram features and labels
train_mel, train_labels = get_mel_spectrogram_feature(train, True)
val_mel, val_labels = get_mel_spectrogram_feature(val, True)

In [None]:
# Load YAMNet model
yamnet_model_handle = 'https://tfhub.dev/google/yamnet/1'
yamnet_model = hub.load(yamnet_model_handle)

def extract_embedding(mel_spectrogram):
    # Yamnet expects shape (1, 64, 96)
    # Resize the mel spectrogram to fit YAMNet's expected input
    mel_spectrogram_resized = tf.image.resize(mel_spectrogram, [64, 96])
    scores, embeddings, spectrogram = yamnet_model(mel_spectrogram_resized)
    return embeddings

# Extract embeddings for training and validation data
train_embeddings = np.array([extract_embedding(tf.convert_to_tensor(mel[np.newaxis, ...])).numpy() for mel in train_mel])
val_embeddings = np.array([extract_embedding(tf.convert_to_tensor(mel[np.newaxis, ...])).numpy() for mel in val_mel])

In [None]:
# Build a simple classifier model
def build_model(input_shape):
    model = Sequential([
        Dense(128, activation='relu', input_shape=input_shape),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(lr=CONFIG.LR), loss=BinaryCrossentropy(), metrics=['accuracy'])
    return model



In [None]:
input_shape = train_embeddings.shape[1:]
model = build_model(input_shape)

# Convert labels to numpy arrays
train_labels = np.array(train_labels)
val_labels = np.array(val_labels)

In [None]:
# Train the model
model.fit(train_embeddings, train_labels, epochs=CONFIG.N_EPOCHS, batch_size=CONFIG.BATCH_SIZE,
          validation_data=(val_embeddings, val_labels))