# Imports

In [1]:
import librosa

from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import random
import warnings

from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

import torch
import torchmetrics
import os

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Config

In [4]:
class Config:
    SR = 32000
    N_MFCC = 125
    # Dataset
    ROOT_FOLDER = './'
    # Training
    N_CLASSES = 1
    BATCH_SIZE = 96
    N_EPOCHS = 5
    LR = 3e-4
    # Others
    SEED = 42
    
CONFIG = Config()

In [5]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CONFIG.SEED) # Seed 고정

In [None]:
df = pd.read_csv('open/train.csv')
train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, random_state=CONFIG.SEED)

## Data Pre-processing : MFCC

In [None]:
def get_mfcc_feature(df, train_mode=True):
    features = []
    labels = []
    for _, row in tqdm(df.iterrows()):
        # librosa패키지를 사용하여 wav 파일 load
        path = 'open/train/' + row['path'][8:]
        y, sr = librosa.load(path, sr=CONFIG.SR)
        
        # librosa패키지를 사용하여 mfcc 추출
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=CONFIG.N_MFCC, n_fft=400, hop_length=200)
        mfcc = np.mean(mfcc.T, axis=0)
        features.append(mfcc)

        if train_mode:
            label = row['label']
            label_vector = np.zeros(CONFIG.N_CLASSES, dtype=float)
            label_vector[0] = (0 if label == 'fake' else 1)
            labels.append(label_vector)

    if train_mode:
        return features, labels
    return features

In [8]:
train_mfcc, train_labels = get_mfcc_feature(train, True)
val_mfcc, val_labels = get_mfcc_feature(val, True)

44350it [09:45, 75.79it/s] 
11088it [02:55, 63.36it/s]


# Dataset

In [9]:
class CustomDataset(Dataset):
    def __init__(self, mfcc, label):
        self.mfcc = mfcc
        self.label = label

    def __len__(self):
        return len(self.mfcc)

    def __getitem__(self, index):
        if self.label is not None:
            return self.mfcc[index], self.label[index]
        return self.mfcc[index]

In [10]:
train_dataset = CustomDataset(train_mfcc, train_labels)
val_dataset = CustomDataset(val_mfcc, val_labels)

In [11]:
train_loader = DataLoader(
    train_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False
)

# Define Model 

In [29]:
class MLP(nn.Module):
    def __init__(self, input_dim=CONFIG.N_MFCC, hidden_dim=128, output_dim=CONFIG.N_CLASSES):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, hidden_dim)
        self.fc4 = nn.Linear(hidden_dim, hidden_dim)
        self.fc5 = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.LeakyReLU(0.01)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.relu(self.fc4(x))
        x = self.fc5(x)
        x = torch.sigmoid(x)
        return x

# Train & Validation

In [30]:
from sklearn.metrics import roc_auc_score

def train(model, optimizer, train_loader, val_loader, device):
    model.to(device)
    criterion = nn.BCELoss().to(device)
    
    best_val_score = 0
    best_model = None
    
    for epoch in range(1, CONFIG.N_EPOCHS+1):
        model.train()
        train_loss = []
        for features, labels in tqdm(iter(train_loader)):
            features = features.float().to(device)
            labels = labels.float().to(device)
            
            optimizer.zero_grad()
            
            output = model(features)
            loss = criterion(output, labels)
            
            loss.backward()
            optimizer.step()
            
            train_loss.append(loss.item())
                    
        _val_loss, _val_score = validation(model, criterion, val_loader, device)
        _train_loss = np.mean(train_loss)
        print(f'Epoch [{epoch}], Train Loss : [{_train_loss:.5f}] Val Loss : [{_val_loss:.5f}] Val AUC : [{_val_score:.5f}]')
            
        if best_val_score < _val_score:
            best_val_score = _val_score
            best_model = model
    
    return best_model

def multiLabel_AUC(y_true, y_scores):
    auc_scores = []
    for i in range(y_true.shape[1]):
        auc = roc_auc_score(y_true[:, i], y_scores[:, i])
        auc_scores.append(auc)
    mean_auc_score = np.mean(auc_scores)
    return mean_auc_score
    
def validation(model, criterion, val_loader, device):
    model.eval()
    val_loss, all_labels, all_probs = [], [], []
    
    with torch.no_grad():
        for features, labels in tqdm(iter(val_loader)):
            features = features.float().to(device)
            labels = labels.float().to(device)
            
            probs = model(features)
            
            loss = criterion(probs, labels)

            val_loss.append(loss.item())

            all_labels.append(labels.cpu().numpy())
            all_probs.append(probs.cpu().numpy())
        
        _val_loss = np.mean(val_loss)

        all_labels = np.concatenate(all_labels, axis=0)
        all_probs = np.concatenate(all_probs, axis=0)
        
        # Calculate AUC score
        auc_score = multiLabel_AUC(all_labels, all_probs)
    
    return _val_loss, auc_score

## Run

In [31]:
model = MLP()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CONFIG.LR)

infer_model = train(model, optimizer, train_loader, val_loader, device)

100%|███████████████████████████████████████████████████████████████████████████████| 462/462 [00:02<00:00, 229.49it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 116/116 [00:00<00:00, 705.04it/s]


Epoch [1], Train Loss : [0.33069] Val Loss : [0.17542] Val AUC : [0.98141]


100%|███████████████████████████████████████████████████████████████████████████████| 462/462 [00:01<00:00, 240.65it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 116/116 [00:00<00:00, 713.20it/s]


Epoch [2], Train Loss : [0.13800] Val Loss : [0.08342] Val AUC : [0.99600]


100%|███████████████████████████████████████████████████████████████████████████████| 462/462 [00:01<00:00, 231.78it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 116/116 [00:00<00:00, 690.12it/s]


Epoch [3], Train Loss : [0.06690] Val Loss : [0.05194] Val AUC : [0.99832]


100%|███████████████████████████████████████████████████████████████████████████████| 462/462 [00:01<00:00, 233.50it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 116/116 [00:00<00:00, 635.59it/s]


Epoch [4], Train Loss : [0.04655] Val Loss : [0.05347] Val AUC : [0.99876]


100%|███████████████████████████████████████████████████████████████████████████████| 462/462 [00:01<00:00, 232.88it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 116/116 [00:00<00:00, 613.47it/s]

Epoch [5], Train Loss : [0.04094] Val Loss : [0.04756] Val AUC : [0.99853]





## Inference

In [None]:
def get_mfcc_feature(df, train_mode=True):
    features = []
    labels = []
    warnings.filterwarnings("ignore")
    for _, row in tqdm(df.iterrows()):
        try:
            # librosa패키지를 사용하여 wav 파일 load
            path = 'denoise_si_wav' + row['path'][6:-3] + 'wav'
            y, sr = librosa.load(path, sr=CONFIG.SR)
            
            # librosa패키지를 사용하여 mfcc 추출
            mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=CONFIG.N_MFCC, n_fft=400, hop_length=200)
            mfcc = np.mean(mfcc.T, axis=0)
            features.append(mfcc)
        except Exception as e:
            zero = np.zeros(shape=(80,), dtype=np.float32)
            features.append(zero)

        if train_mode:
            label = row['label']
            label_vector = np.zeros(CONFIG.N_CLASSES, dtype=float)
            label_vector[0] = (0 if label == 'fake' else 1)
            labels.append(label_vector)

    if train_mode:
        return features, labels
    return features

In [None]:
test = pd.read_csv('open/test.csv')
test_mfcc = get_mfcc_feature(test, False)
test_dataset = CustomDataset(test_mfcc, None)
test_loader = DataLoader(
    test_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False
)

50000it [07:36, 109.59it/s]


In [34]:
def inference(model, test_loader, device):
    model.to(device)
    model.eval()
    predictions = []
    with torch.no_grad():
        for features in tqdm(iter(test_loader)):
            features = features.float().to(device)
            
            probs = model(features)

            probs  = probs.cpu().detach().numpy()
            predictions += probs.tolist()
    return predictions

In [35]:
preds_real = inference(infer_model, test_loader, device)

100%|███████████████████████████████████████████████████████████████████████████████| 521/521 [00:00<00:00, 916.64it/s]


In [36]:
print(preds_real[:10])

[[0.002740570344030857], [0.017574405297636986], [0.004778772592544556], [0.15478770434856415], [0.9996433258056641], [0.9996558427810669], [2.2236259708541262e-11], [0.9391964673995972], [0.35646241903305054], [0.006666060537099838]]


In [None]:
df = pd.DataFrame(data = preds_real)
df.to_csv('open/true_submit.csv', index=False)

In [38]:
for v in train_labels:
    if v[0] == 0:
        v[0] = 1
    else:
        v[0] = 0

for v in val_labels:
    if v[0] == 0:
        v[0] = 1
    else:
        v[0] = 0

In [39]:
print(train_labels[2][0])

0.0


# FAKE

In [40]:
train_dataset = CustomDataset(train_mfcc, train_labels)
val_dataset = CustomDataset(val_mfcc, val_labels)

In [41]:
train_loader = DataLoader(
    train_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False
)

In [42]:
model = MLP()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CONFIG.LR)

infer_model = train(model, optimizer, train_loader, val_loader, device)

100%|███████████████████████████████████████████████████████████████████████████████| 462/462 [00:01<00:00, 233.50it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 116/116 [00:00<00:00, 705.20it/s]


Epoch [1], Train Loss : [0.33484] Val Loss : [0.16815] Val AUC : [0.98264]


100%|███████████████████████████████████████████████████████████████████████████████| 462/462 [00:01<00:00, 235.07it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 116/116 [00:00<00:00, 634.96it/s]


Epoch [2], Train Loss : [0.12979] Val Loss : [0.06862] Val AUC : [0.99665]


100%|███████████████████████████████████████████████████████████████████████████████| 462/462 [00:02<00:00, 225.35it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 116/116 [00:00<00:00, 713.19it/s]


Epoch [3], Train Loss : [0.07310] Val Loss : [0.06500] Val AUC : [0.99835]


100%|███████████████████████████████████████████████████████████████████████████████| 462/462 [00:01<00:00, 235.96it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 116/116 [00:00<00:00, 582.22it/s]


Epoch [4], Train Loss : [0.05997] Val Loss : [0.08729] Val AUC : [0.99850]


100%|███████████████████████████████████████████████████████████████████████████████| 462/462 [00:03<00:00, 135.83it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 116/116 [00:00<00:00, 253.18it/s]

Epoch [5], Train Loss : [0.04993] Val Loss : [0.03962] Val AUC : [0.99894]





In [43]:
preds_fake = inference(infer_model, test_loader, device)

100%|███████████████████████████████████████████████████████████████████████████████| 521/521 [00:00<00:00, 562.11it/s]


In [44]:
print(preds_fake[:10])

[[0.8930511474609375], [0.031446538865566254], [0.024085313081741333], [0.73601895570755], [8.117932338791434e-06], [1.056486780726118e-05], [0.9999986886978149], [6.43106468487531e-05], [0.015887204557657242], [0.15542559325695038]]


In [None]:
df = pd.DataFrame(data = preds_fake)
df.to_csv('open/fake_submit.csv', index=False)

# Merge

In [None]:
real = pd.read_csv('open/true_submit.csv')
fake = pd.read_csv('open/fake_submit.csv')
index = pd.read_csv('open/submit_base_1.csv')

In [47]:
col = ['id', 'fake', 'real']
index = np.array(index)
index = index[0:,0:1]
real = np.array(real)
fake = np.array(fake)

In [48]:
print(fake[:10])

[[8.93051147e-01]
 [3.14465389e-02]
 [2.40853131e-02]
 [7.36018956e-01]
 [8.11793234e-06]
 [1.05648678e-05]
 [9.99998689e-01]
 [6.43106468e-05]
 [1.58872046e-02]
 [1.55425593e-01]]


In [49]:
sub_data = np.hstack([index, fake])
sub_data = np.hstack([sub_data, real])

print(sub_data[:3])

[['TEST_00000' 0.8930511474609375 0.0027405703440308]
 ['TEST_00001' 0.0314465388655662 0.0175744052976369]
 ['TEST_00002' 0.0240853130817413 0.0047787725925445]]


In [None]:
# counting voice of datas
num = pd.read_csv('open/test_num_50000.csv')

In [52]:
num = np.array(num)

In [53]:
for i in range(50000):
    if num[i][1] == 0:
        sub_data[i][1] = 0
        sub_data[i][2] = 0

print(sub_data[:7])

[['TEST_00000' 0.8930511474609375 0.0027405703440308]
 ['TEST_00001' 0.0314465388655662 0.0175744052976369]
 ['TEST_00002' 0.0240853130817413 0.0047787725925445]
 ['TEST_00003' 0.73601895570755 0.1547877043485641]
 ['TEST_00004' 8.117932338791434e-06 0.999643325805664]
 ['TEST_00005' 1.056486780726118e-05 0.9996558427810668]
 ['TEST_00006' 0 0]]


In [None]:
submit = pd.DataFrame(data = sub_data, columns = col)
submit.to_csv('open/noise_submit.csv', index = False)