# Imports

In [None]:
import librosa

from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import random

from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

import torch
import torchmetrics
import os

from utils.measure import auc_brier_ece
from utils.train import train
from feature.mfcc import get_mfcc_feature
from inference.inference import inference

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Config

In [None]:
class Config:
    SR = 32000
    N_MFCC = 13
    # Dataset
    ROOT_FOLDER = '../data'
    # Training
    N_CLASSES = 2
    BATCH_SIZE = 512
    N_EPOCHS = 100
    LR = 3e-4
    # Others
    SEED = 42
    
CONFIG = Config()

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(CONFIG.SEED) # Seed 고정

In [None]:
df = pd.read_csv(os.path.join(Config.ROOT_FOLDER, 'train.csv'))
train, val, _, _ = train_test_split(df, df['label'], test_size=0.2, random_state=CONFIG.SEED)

## Data Pre-processing : MFCC

In [None]:
train_mfcc, train_labels = get_mfcc_feature(train, Config=CONFIG, train_mode=True)
val_mfcc, val_labels = get_mfcc_feature(val, Config=CONFIG, train_mode=True)

# Dataset

In [None]:
class CustomDataset(Dataset):
    def __init__(self, mfcc, label):
        self.mfcc = mfcc
        self.label = label

    def __len__(self):
        return len(self.mfcc)

    def __getitem__(self, index):
        if self.label is not None:
            return self.mfcc[index], self.label[index]
        return self.mfcc[index]

In [None]:
train_dataset = CustomDataset(train_mfcc, train_labels)
val_dataset = CustomDataset(val_mfcc, val_labels)

In [None]:
train_loader = DataLoader(
    train_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=True
)
val_loader = DataLoader(
    val_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False
)

# Define Model

In [None]:
class MLP(nn.Module):
    def __init__(self, input_dim=CONFIG.N_MFCC, hidden_dim=128, output_dim=CONFIG.N_CLASSES):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        x = torch.sigmoid(x)
        return x

## Run

In [None]:
model = MLP()
optimizer = torch.optim.Adam(params = model.parameters(), lr = CONFIG.LR)

infer_model = train(model, optimizer, train_loader, val_loader, device, CONFIG=CONFIG)

## Submission - validation

In [None]:
preds = inference(infer_model, val_loader, device, val=True)

In [None]:
submit = pd.read_csv(os.path.join(CONFIG.ROOT_FOLDER, 'val_sample_submission.csv'))
submit.iloc[:, 1:] = preds
submit.head()

In [None]:
submit.to_csv(os.path.join(CONFIG.ROOT_FOLDER, 'val_baseline_submit.csv'), index=False)

# Measurement - validation set

In [None]:
answer_df = pd.DataFrame(val_labels, columns=['0', '1'])
submission_df = pd.read_csv(os.path.join(CONFIG.ROOT_FOLDER, 'val_baseline_submit.csv'))
submission_df = submission_df.drop('id', axis=1)
submission_df = submission_df.rename(columns={'fake': '0', 'real' : '1'})

auc_brier_ece(answer_df=answer_df, submission_df=submission_df)

# Submission - TEST

In [None]:
test = pd.read_csv(os.path.join(CONFIG.ROOT_FOLDER, 'test.csv'))
test_mfcc = get_mfcc_feature(test, Config=CONFIG, train_mode=False)
test_dataset = CustomDataset(test_mfcc, None)
test_loader = DataLoader(
    test_dataset,
    batch_size=CONFIG.BATCH_SIZE,
    shuffle=False
)

In [None]:
preds = inference(infer_model, test_loader, device, val=False)

In [None]:
submit = pd.read_csv(os.path.join(CONFIG.ROOT_FOLDER, 'sample_submission.csv'))
submit.iloc[:, 1:] = preds
submit.head()

submit.to_csv(os.path.join(CONFIG.ROOT_FOLDER, 'baseline_submit.csv'), index=False)