# SW중심대학 디지털 경진대회_SW와 생성AI의 만남 : AI부문
 - 이 AI 경진대회에서는 5초 분량의 오디오 샘플에서 진짜 사람 목소리와 AI가 생성한 가짜 목소리를 정확하게 구분할 수 있는 모델을 개발하는 것이 목표입니다.
 - 이 작업은 보안, 사기 감지 및 오디오 처리 기술 향상 등 다양한 분야에서 매우 중요합니다.

In [1]:
try:
    import torch
except ImportError:
    try:
        %conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
    except:
        %pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

try:
    import librosa
except:
    try:
        %conda install -c conda-forge librosa
    except:
        %pip install librosa

try:
    import wespeaker
except ImportError:
    %pip install git+https://github.com/wenet-e2e/wespeaker.git

try:
    import huggingface_hub
except ImportError:
    %pip install huggingface_hub

## Imports
모델 학습 및 추론에 사용할 라이브러리들을 불러옵니다.

In [2]:
import os
import math
import random

import torch
import torchaudio

import numpy as np
import pandas as pd

from torch import nn
import torch.nn.functional as F
import torchaudio.transforms as T
import torchaudio.pipelines as pipelines
from torch.utils.data import Dataset, DataLoader

from huggingface_hub import hf_hub_download
import wespeaker

from tqdm.notebook import tqdm

### Check GPU Availability

In [3]:
!nvidia-smi

Wed Jul 17 10:45:53 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.29.05    Driver Version: 495.29.05    CUDA Version: 11.5     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  On   | 00000000:04:00.0 Off |                    0 |
| N/A   44C    P0    41W / 250W |   4067MiB / 16280MiB |     20%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla P100-PCIE...  On   | 00000000:06:00.0 Off |                    0 |
| N/A   38C    P0    25W / 250W |      2MiB / 16280MiB |      0%      Default |
|       

In [4]:
# Set CUDA Device Number 0~7
DEVICE_NUM = 1

if torch.cuda.is_available():
    torch.cuda.set_device(DEVICE_NUM)
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
    DEVICE_NUM = -1  # cpu
print(f"INFO: Using device - {device}:{DEVICE_NUM}")

INFO: Using device - cuda:1


## Config
- 딥러닝 모델을 학습하기 전에 설정해야하는 다양한 매개변수를 정의하는 설정 클래스입니다.
- 클래스를 사용하여 학습에 필요한 설정 값을 미리 지정합니다.

##### 오디오 신호
- 우리가 듣는 소리는 공기의 압력 변화로, 이것을 디지털 신호로 변환한 것이 오디오 신호입니다.
- 이 신호는 시간에 따라 변하는 진폭 값을 가지고 있습니다.

In [5]:
class Config:
    """ Configuration Class """
    SEED = 20240719  # 재현성을 위해 랜덤 시드 고정
    NB_NAME = "transfer_learning"  # ipython 노트북 이름 지정
    ROOT_FOLDER = os.path.join(".", "data")

    BATCH_SIZE = 128
    LR = 1e-5

In [6]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(Config.SEED)  # Seed 고정

## Dataset

In [7]:
from torchvision.datasets import utils
from sklearn.model_selection import train_test_split as split

utils.tqdm = tqdm


class VoiceDataset(Dataset):
    download_url = "https://drive.usercontent.google.com/download?id=1hi1dibkHyFbaxAteLlZJw6r3g9ddd4Lf&export=download&authuser=0&confirm=t&uuid=c40c278b-d74b-4b75-bc79-09e8a3ccffa4&at=APZUnTUvIVFVM9gjGNUCmDb4YZCy%3A1719807236671"

    @classmethod
    def download(cls, root='./data', filename="download.zip", md5=None):
        cls.download_root = root
        filepath = os.path.join(root, filename)
        if not os.path.exists(filepath):
            utils.download_and_extract_archive(cls.download_url, root, root, filename, md5)
            print("Extraction completed.")
        else:
            print(f"File already exists in {filepath}")

    @property
    def get_dataset_path(self):
        filename = "train.csv" if self.is_train else "test.csv"
        if self.custom_csv:
            filename = self.custom_csv + ".csv"
        return os.path.join(self.download_root, filename)

    @property
    def submission_form_path(cls):
        return os.path.join(cls.download_root, "sample_submission.csv")

    def __init__(self, root="./data", train=True, split_ratio=1, transform=None, custom_csv=None):
        """
        Voice Dataset for Contrastive Learning
        
        :param root: The path to the data directory
        :param train: is train or test
        :param split_ratio: split ratio for train(can be 0.5 or above) and valid(can be lower than 0.5) set
        :param transform: data transformer
        :param target_transform: label transformer
        """
        super().__init__()
        self.download(root)
        self.download_root = root
        self.is_train = train
        self.custom_csv = custom_csv
        self.name = ("train" if train else "test") if not custom_csv else custom_csv

        raw_data = self._load_data(self.get_dataset_path, split_ratio if split_ratio >= 0.5 else 1-split_ratio)
        if not self.is_train or split_ratio >= 0.5:
            self.raw_data, _ = raw_data
        else:
            _, self.raw_data = raw_data
            if "train" not in self.name:
                print(f"Warning: The name of dataset should start with 'train' for training set. (current - {self.name})")
            self.name = self.name.replace("train", "valid")

        self.data0 = self.raw_data['path'].tolist()
        self.data1 = self.raw_data['path'].tolist()

        if 'label' in self.raw_data.columns:
            self.label = [(0, 1) if lb == 'real' else (1, 0) for lb in self.raw_data['label'].tolist()]
        else:
            if 'real' in self.raw_data.columns and 'fake' in self.raw_data.columns:
                f_label = self.raw_data['fake'].tolist()
                r_label = self.raw_data['real'].tolist()
                self.label = list(zip(f_label, r_label))
            else:
                self.label = None

        self.transforms(transform)

    @staticmethod
    def _load_data(dataset_path, split_ratio=1):
        random_state = 1  # fixed random_state

        df = pd.read_csv(dataset_path)

        if split_ratio == 1 or split_ratio == 0:
            return (df, None) if split_ratio == 1 else (None, df)

        if 'label' in df.columns:
            df1, df2, _, _ = split(df, df['label'], test_size=1-split_ratio, random_state=random_state)
        else:
            df1, df2 = split(df, test_size=1-split_ratio, random_state=random_state)
        return df1, df2

    def transforms(self, transform=None):
        if transform is not None:
            if not isinstance(transform, list) and not isinstance(transform, tuple):
                transform = [transform]
            for t in transform:
                self.data0, self.data1, self.label = t(self.data0, self.data1, self.label)

    def __len__(self):
        return len(self.data0)

    def __getitem__(self, index):
        if self.label is not None:
            return self.data0[index], self.data1[index], self.label[index]
        return self.data0[index], self.data1[index]

In [8]:
split_ratio = 0.8

train_dataset = VoiceDataset(root=Config.ROOT_FOLDER, train=True, split_ratio=split_ratio)
train_augmented = VoiceDataset(root=Config.ROOT_FOLDER, train=True, split_ratio=split_ratio, custom_csv="train_augmented")
valid_dataset = VoiceDataset(root=Config.ROOT_FOLDER, train=True, split_ratio=1-split_ratio)
valid_augmented = VoiceDataset(root=Config.ROOT_FOLDER, train=True, split_ratio=1-split_ratio, custom_csv="train_augmented")
unlabeled_dataset = VoiceDataset(root=Config.ROOT_FOLDER, train=False, custom_csv="unlabeled_data")
test_dataset = VoiceDataset(root=Config.ROOT_FOLDER, train=False)

print(f"Loaded Dataset - train({len(train_dataset)}), valid({len(valid_dataset)}), unlabeled({len(unlabeled_dataset)}) test({len(test_dataset)})")
print("Query Dataset for checking:", train_dataset[0])
train_dataset.raw_data

File already exists in ./data/download.zip
File already exists in ./data/download.zip
File already exists in ./data/download.zip
File already exists in ./data/download.zip
File already exists in ./data/download.zip
File already exists in ./data/download.zip
Loaded Dataset - train(44350), valid(11088), unlabeled(1264) test(50000)
Query Dataset for checking: ('./train/NQJUDUMG.ogg', './train/NQJUDUMG.ogg', (1, 0))


Unnamed: 0,id,path,label
19535,NQJUDUMG,./train/NQJUDUMG.ogg,fake
37414,SGACBBDI,./train/SGACBBDI.ogg,fake
40645,SIBSFMAP,./train/SIBSFMAP.ogg,fake
16487,LLBQPFAD,./train/LLBQPFAD.ogg,real
954,ZWYRTAOF,./train/ZWYRTAOF.ogg,real
...,...,...,...
50057,BDFFJCBX,./train/BDFFJCBX.ogg,fake
32511,NEFSVUCS,./train/NEFSVUCS.ogg,real
5192,MJFGSHIR,./train/MJFGSHIR.ogg,fake
12172,USIDOXOR,./train/USIDOXOR.ogg,real


#### Data Transformation
By using 
[TorchAudio Models](https://pytorch.org/audio/stable/models.html) |
[TorchAudio Pretrained Models](https://pytorch.org/audio/stable/pipelines.html#module-torchaudio.pipelines)

In [9]:
class AudioPipelines:
    """ Audio Pipelines - Pretrained Embeddings """
    
    wav2vec_bundle = pipelines.WAV2VEC2_ASR_BASE_960H
    resnet_bundle = "Wespeaker/wespeaker-voxceleb-resnet152-LM"
    device_setting = (device, DEVICE_NUM)
    
    def __init__(self, audio_cache_dir="audio_cache", nb_name=Config.NB_NAME):
        self.audio_cache_dir = audio_cache_dir
        if not os.path.isdir(audio_cache_dir):
            os.mkdir(audio_cache_dir)
        if not os.path.isdir(os.path.join(audio_cache_dir, nb_name)):
            os.mkdir(os.path.join(audio_cache_dir, nb_name))
        self.wav2vec = self.get_wav2vec(audio_cache_dir=audio_cache_dir)
        self.resnet = self.get_resnet(audio_cache_dir=audio_cache_dir)
        if not os.path.isdir(os.path.join(audio_cache_dir, nb_name, self.wav2vec.name)):
            os.mkdir(os.path.join(audio_cache_dir, nb_name, self.wav2vec.name))
        if not os.path.isdir(os.path.join(audio_cache_dir, nb_name, self.resnet.name)):
            os.mkdir(os.path.join(audio_cache_dir, nb_name, self.resnet.name))

    @classmethod
    def get_wav2vec(cls, audio_cache_dir="."):
        sr = cls.wav2vec_bundle.sample_rate  # Wav2Vec2 Model uses sample rate 16kHz
        wav2vec_model = cls.wav2vec_bundle.get_model()
        wav2vec_model.to(cls.device_setting[0])
        print(f"INFO: Wav2Vec Model Loaded on {cls.device_setting[0]}:{cls.device_setting[1]}")
        wav2vec_model.eval()
        
        def wav2vec(path):
            waveform, sample_rate = torchaudio.load(path, normalize=True)
            if sample_rate != sr:
                resampler = T.Resample(sample_rate, sr)
                waveform = resampler(waveform)
            with torch.no_grad():
                embedding, _ = wav2vec(waveform.to(cls.device_setting[0]))
            return embedding
        
        wav2vec.__dict__['name'] = str(cls.wav2vec_bundle._path).split(".")[0]
        wav2vec.__dict__['cache'] = audio_cache_dir
        return wav2vec

    @classmethod
    def get_resnet(cls, audio_cache_dir="."):
        model_id = cls.resnet_bundle
        model_name = model_id.replace("Wespeaker/wespeaker-", "").replace("-", "_")
    
        root_dir = hf_hub_download(model_id, filename=model_name+".onnx").replace(model_name+".onnx", "")
        if not os.path.isfile(root_dir+"avg_model.pt"):
            os.rename(hf_hub_download(model_id, filename=model_name+".pt"), root_dir+"avg_model.pt")
        if not os.path.isfile(root_dir+"config.yaml"):
            os.rename(hf_hub_download(model_id, filename=model_name+".yaml"), root_dir+"config.yaml")
    
        resnet_model = wespeaker.load_model_local(root_dir)
        resnet_model.set_gpu(-1 if cls.device_setting[0] == torch.device('cpu') else cls.device_setting[1])
        print(f"INFO: ResNet Model Loaded on {resnet_model.device}")

        def resnet(path):
            return resnet_model.extract_embedding(path)

        resnet.__dict__['name'] = model_name
        resnet.__dict__['cache'] = audio_cache_dir
        return resnet

In [10]:
def to_embedding(dataset_name, pretrained, d_idx):
    convert_path = lambda path: os.path.join(Config.ROOT_FOLDER, *path.replace("./", "").split("/"))
    embedding_path = os.path.join(pretrained.cache, Config.NB_NAME, pretrained.name, f"{dataset_name}.embedding")

    def convert(*args):
        *datas_list, labels = args
        if not os.path.isfile(embedding_path):
            new_datas = [pretrained(convert_path(path)) for path in tqdm(datas_list[d_idx], desc=f"Convert {dataset_name} dataset with {pretrained.name}")]
            torch.save(new_datas, embedding_path)
            print("INFO: Voice Embedding saved.")
        else:
            new_datas = torch.load(embedding_path)
            print(f"INFO: Pretrained {pretrained.name} embedding for {dataset_name} dataset is loaded.")
        datas_list[d_idx] = new_datas
        return *datas_list, labels
    return convert

In [11]:
WV_DIM_SIZE = 256

def flatten_tensor(adaptive_pool=nn.AdaptiveAvgPool1d(WV_DIM_SIZE), d_idx=1):
    def flatten(*args):
        *datas_list, labels = args
        datas_list[d_idx] = [adaptive_pool(torch.flatten(t).unsqueeze(0)).squeeze(0) for t in datas_list[d_idx]]
        return *datas_list, labels
    return flatten

In [12]:
to_tensor = lambda *args: (*args[:-1], list(map(torch.tensor, args[-1])))  # label to tensor

apl = AudioPipelines()  # Create Audio Pipeline for converting audio to embeddings

for dataset in [train_dataset, train_augmented, valid_dataset, valid_augmented]:
    dataset.transforms(transform=[
        to_embedding(dataset.name, apl.resnet, d_idx=0),
        to_embedding(dataset.name, apl.wav2vec, d_idx=1),
        flatten_tensor(d_idx=1),
        to_tensor
    ])

for dataset in [unlabeled_dataset, test_dataset]:
    dataset.transforms(transform=[
        to_embedding(dataset.name, apl.resnet, d_idx=0),
        to_embedding(dataset.name, apl.wav2vec, d_idx=1),
        flatten_tensor(d_idx=1)
    ])

del apl  # release memory

INFO: Wav2Vec Model Loaded on cuda:1




INFO: ResNet Model Loaded on cuda:1
INFO: Pretrained voxceleb_resnet152_LM embedding for train dataset is loaded.
INFO: Pretrained wav2vec2_fairseq_base_ls960_asr_ls960 embedding for train dataset is loaded.
INFO: Pretrained voxceleb_resnet152_LM embedding for train_augmented dataset is loaded.
INFO: Pretrained wav2vec2_fairseq_base_ls960_asr_ls960 embedding for train_augmented dataset is loaded.
INFO: Pretrained voxceleb_resnet152_LM embedding for valid dataset is loaded.
INFO: Pretrained wav2vec2_fairseq_base_ls960_asr_ls960 embedding for valid dataset is loaded.
INFO: Pretrained voxceleb_resnet152_LM embedding for valid_augmented dataset is loaded.
INFO: Pretrained wav2vec2_fairseq_base_ls960_asr_ls960 embedding for valid_augmented dataset is loaded.
INFO: Pretrained voxceleb_resnet152_LM embedding for unlabeled_data dataset is loaded.
INFO: Pretrained wav2vec2_fairseq_base_ls960_asr_ls960 embedding for unlabeled_data dataset is loaded.
INFO: Pretrained voxceleb_resnet152_LM embeddi

In [13]:
for (*data, label), i in zip(train_dataset, range(5)):
    print(f"Train Dataset {i}: {label}", data)

Train Dataset 0: tensor([1, 0]) [tensor([-0.2622, -0.1319, -0.0539,  0.0124,  0.1426,  0.1331,  0.0709,  0.0193,
         0.0863, -0.2342,  0.0471,  0.2622, -0.0538,  0.0060,  0.0194, -0.0204,
        -0.1776,  0.0919,  0.1826,  0.0205, -0.0145, -0.0908,  0.0730,  0.1513,
         0.0507,  0.1620,  0.0702, -0.1969, -0.0679, -0.1909, -0.0514, -0.0315,
        -0.1292, -0.1566,  0.0784,  0.2276,  0.0624,  0.0883, -0.0481,  0.2119,
         0.1538, -0.0134,  0.1566,  0.0766, -0.0213, -0.1333, -0.0209, -0.0596,
         0.0964,  0.1612, -0.1159, -0.2910, -0.0109,  0.1471, -0.2822, -0.0772,
         0.1346, -0.0208,  0.0997,  0.2245,  0.0230,  0.0578, -0.0675,  0.0491,
        -0.1563,  0.0842, -0.1240, -0.0403, -0.0722,  0.0857, -0.1086, -0.0910,
        -0.1778,  0.1218, -0.0394,  0.0036,  0.1889,  0.4586, -0.2083,  0.1329,
        -0.2099,  0.0219,  0.1260,  0.0328,  0.1316,  0.0938, -0.0038,  0.0322,
        -0.1676,  0.0230, -0.0543, -0.0173,  0.0925,  0.0832,  0.0672, -0.0914,
       

In [14]:
for (*data, label), i in zip(valid_dataset, range(5)):
    print(f"Valid Dataset {i}: {label}", data)

Valid Dataset 0: tensor([1, 0]) [tensor([-0.0336,  0.0182,  0.1605, -0.0633, -0.2657, -0.0786,  0.2894, -0.0773,
        -0.0861,  0.1535,  0.2038, -0.1084, -0.0763,  0.0494,  0.2047, -0.0693,
         0.2637,  0.0098,  0.0902,  0.0144,  0.0416,  0.0160,  0.1329,  0.1867,
        -0.0150,  0.0670,  0.0060, -0.2174,  0.0260, -0.1373,  0.1242,  0.1294,
        -0.0187,  0.1399,  0.0461,  0.1157,  0.1255,  0.0183, -0.0848,  0.0068,
        -0.0793,  0.0474, -0.0455, -0.1096, -0.0644,  0.0303, -0.0370, -0.1810,
        -0.1520,  0.0405, -0.0606, -0.0048,  0.0287, -0.0114, -0.0170, -0.1477,
         0.0111, -0.0047,  0.1893, -0.2278, -0.1438,  0.0524,  0.1691, -0.2530,
         0.0281, -0.0036, -0.0240, -0.1658, -0.1500,  0.1094,  0.2081, -0.0912,
        -0.1184,  0.0304,  0.0684,  0.0259,  0.1747,  0.2028,  0.0706, -0.1569,
        -0.0617,  0.0943,  0.1673,  0.1042,  0.0533, -0.1656, -0.0861,  0.0449,
        -0.1007,  0.0041, -0.2378, -0.0718,  0.2067, -0.0689,  0.2285,  0.1158,
       

In [15]:
for *data, i in zip(unlabeled_dataset, range(5)):
    print(f"UnLabeled Dataset {i}:", data)

UnLabeled Dataset 0: [(tensor([-9.8545e-02,  7.8067e-02,  9.8155e-02, -9.5028e-02, -1.5494e-03,
        -1.0997e-01,  4.6318e-03, -3.1125e-02,  3.5486e-02,  1.2958e-01,
        -1.1017e-01, -2.9643e-02,  1.3663e-01,  4.7201e-02,  2.7647e-02,
        -5.2182e-03, -1.6107e-01, -2.5661e-03,  4.1623e-02,  5.6934e-02,
        -3.3284e-02, -1.0894e-01,  7.9592e-02,  9.2778e-02, -4.4489e-02,
         3.8495e-02,  1.1325e-01,  1.0612e-01,  5.6692e-02,  1.5595e-02,
         5.0284e-02, -1.3181e-02, -2.6522e-03, -8.2216e-02, -1.3975e-01,
         1.2072e-01,  4.0033e-03, -8.3844e-03, -2.4211e-01, -1.3957e-01,
        -1.1047e-03,  7.7923e-03, -5.3082e-02,  2.5970e-01,  1.4133e-01,
        -1.1541e-01,  1.2204e-01, -3.9140e-02, -8.0197e-02,  4.5311e-02,
         7.7239e-02,  6.0349e-02, -5.3912e-02,  2.1894e-02, -3.7784e-02,
        -9.2405e-02, -1.3264e-02,  5.6325e-04,  1.4982e-01, -5.2745e-02,
        -1.5333e-02,  4.1237e-02,  1.1999e-03, -4.5370e-02,  6.2777e-02,
         3.4919e-02,  3.9585

In [16]:
for *data, i in zip(test_dataset, range(5)):
    print(f"Test Dataset {i}:", data)

Test Dataset 0: [(tensor([-0.0440,  0.0194, -0.0009, -0.0082, -0.0174, -0.1296, -0.0795, -0.0763,
         0.1436,  0.0484, -0.0266, -0.0678,  0.0314,  0.0616, -0.0423, -0.0676,
        -0.1374,  0.0819,  0.0537,  0.0678, -0.0810, -0.1357,  0.0359,  0.0167,
         0.0724,  0.0702,  0.1178,  0.1597,  0.0781,  0.0037,  0.1121,  0.0362,
        -0.0505, -0.0568,  0.0253,  0.0175, -0.0293,  0.0337, -0.1793, -0.0305,
        -0.0153, -0.0454, -0.0522,  0.2037,  0.1051, -0.0733, -0.0042,  0.0441,
        -0.0011,  0.1169,  0.0618,  0.0202, -0.0422,  0.0272, -0.0136, -0.1012,
        -0.0886,  0.0227,  0.0316, -0.0563, -0.0129,  0.0821, -0.0032, -0.0357,
         0.0695,  0.0099,  0.0551, -0.0480, -0.0132, -0.0537,  0.0250,  0.2138,
        -0.0260,  0.0234, -0.1294, -0.1327, -0.1087,  0.0559, -0.1272, -0.0230,
        -0.0462,  0.0715,  0.0991,  0.1146, -0.1166, -0.2101, -0.0384, -0.0236,
         0.0557,  0.0326, -0.1009, -0.0410,  0.0916, -0.1731,  0.1195, -0.1009,
        -0.1256,  0.00

## DataLoader
    - DataLoader는 구축된 데이터셋에서 배치크기(batch_size)에 맞게 데이터를 추출하고, 필요에 따라 섞거나(shuffle=True) 순서대로 반환(shuffle=False)하는 역할을 합니다.
    - 훈련 데이터(train_loader)는 일반적으로 섞어서 모델이 데이터에 덜 편향되게 학습하도록하며,
      검증 데이터(val_loader)는 모델 성능 평가를 위해 순서대로 사용하고,
      테스트 데이터(test_loader)는 최종적인 추론을 위해 사용합니다.

    이렇게 DataLoader를 사용함으로써, 효율적인 데이터 처리와 모델 학습 및 평가가 가능해집니다.

In [17]:
BATCH_SIZE = Config.BATCH_SIZE

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
train_aug_loader = DataLoader(train_augmented, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)
valid_aug_loader = DataLoader(valid_augmented, batch_size=BATCH_SIZE, shuffle=False)
unlabeled_loader = DataLoader(unlabeled_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

## Define Model

In [18]:
class Preprocessor(nn.Module):
    def __init__(self):
        super().__init__()
        self.denoise = T.Spectrogram()
        self.mfcc = T.MFCC()
    
    def forward(self, x):
        x = self.denoise(x)
        x = self.mfcc(x)
        return x

### 1. Feature Extraction Layer (ResNet-like structure)

In [19]:
class AttentionLayer(nn.Module):
    def __init__(self, feature_dim):
        super(AttentionLayer, self).__init__()
        self.feature_dim = feature_dim
        self.attention_weights = nn.Parameter(torch.randn(feature_dim))

    def forward(self, x):
        attention_scores = F.softmax(self.attention_weights, dim=0)
        weighted_features = x * attention_scores
        return weighted_features

In [20]:
class ResidualBlock(nn.Module):
    def __init__(self, in_features, out_features, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Linear(in_features, out_features)
        self.bn1 = nn.BatchNorm1d(out_features)
        self.relu = nn.LeakyReLU(0.01)
        self.conv2 = nn.Linear(out_features, out_features)
        self.bn2 = nn.BatchNorm1d(out_features)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out

In [21]:
class FeatureExtractor(nn.Module):
    def __init__(self, embedding_size, hidden_size, latent_size):
        super().__init__()

        # Encoder with skip connections
        self.encoder_block1 = nn.Sequential(
            nn.Linear(embedding_size, hidden_size),
            nn.LeakyReLU(0.01),
            nn.Dropout(0.5),
            nn.BatchNorm1d(hidden_size)
        )
        self.skip1 = nn.Linear(embedding_size, hidden_size)

        self.encoder_block2 = nn.Sequential(
            nn.Linear(hidden_size, hidden_size//2),
            nn.LeakyReLU(0.01),
            nn.Dropout(0.5),
            nn.BatchNorm1d(hidden_size//2)
        )
        self.skip2 = nn.Linear(hidden_size, hidden_size//2)

        self.encoder_block3 = nn.Sequential(
            nn.Linear(hidden_size//2, hidden_size//4),
            nn.LeakyReLU(0.01),
            nn.Dropout(0.5),
            nn.BatchNorm1d(hidden_size//4)
        )
        self.skip3 = nn.Linear(hidden_size//2, hidden_size//4)

        self.encoder_block4 = nn.Sequential(
            nn.Linear(hidden_size//4, hidden_size//8),
            nn.LeakyReLU(0.01),
            nn.Dropout(0.5),
            nn.BatchNorm1d(hidden_size//8)
        )
        self.skip4 = nn.Linear(hidden_size//4, hidden_size//8)

        self.final_encoder = nn.Sequential(
            nn.Linear(hidden_size//8, latent_size),
            nn.LeakyReLU(0.01)
        )

        self.attention = AttentionLayer(latent_size)

    def forward(self, x):
        x1 = self.encoder_block1(x) + self.skip1(x)
        x2 = self.encoder_block2(x1) + self.skip2(x1)
        x3 = self.encoder_block3(x2) + self.skip3(x2)
        x4 = self.encoder_block4(x3) + self.skip4(x3)
        encoded = self.final_encoder(x4)
        attention = self.attention(encoded)
        return attention

In [22]:
class FeatureExtractor(nn.Module):
    def __init__(self, embedding_size, hidden_size, latent_size):
        super(FeatureExtractor, self).__init__()

        self.initial_layer = nn.Sequential(
            nn.Linear(embedding_size, hidden_size),
            nn.BatchNorm1d(hidden_size),
            nn.LeakyReLU(0.01)
        )

        self.layer1 = self._make_layer(hidden_size, hidden_size, 3)
        self.layer2 = self._make_layer(hidden_size, hidden_size//2, 4, stride=2)
        self.layer3 = self._make_layer(hidden_size//2, hidden_size//4, 6, stride=2)
        self.layer4 = self._make_layer(hidden_size//4, hidden_size//8, 3, stride=2)

        self.final_encoder = nn.Sequential(
            nn.Linear(hidden_size//8, latent_size),
            nn.LeakyReLU(0.01)
        )

        self.attention = AttentionLayer(latent_size)

    def _make_layer(self, in_features, out_features, blocks, stride=1):
        downsample = None
        if stride != 1 or in_features != out_features:
            downsample = nn.Sequential(
                nn.Linear(in_features, out_features),
                nn.BatchNorm1d(out_features)
            )

        layers = []
        layers.append(ResidualBlock(in_features, out_features, downsample))
        for _ in range(1, blocks):
            layers.append(ResidualBlock(out_features, out_features))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.initial_layer(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        encoded = self.final_encoder(x)
        attention = self.attention(encoded)
        return attention

### 2. Domain Adaptation Layer

In [23]:
class DomainAdaptationLayer(nn.Module):
    def __init__(self, feature_dim):
        super().__init__()
        self.feature_map = nn.Sequential(
            nn.Linear(feature_dim, feature_dim),
            nn.ReLU(),
            nn.Linear(feature_dim, feature_dim)
        )

    def forward(self, x):
        return self.feature_map(x)

### 3. Adversarial Domain Classification Layer

In [24]:
class AdversarialDomainClassifier(nn.Module):
    def __init__(self, feature_dim, hidden_dim=100):
        super().__init__()
        self.classifier = nn.Sequential(
            nn.Linear(feature_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)  # output => domain 0(train), 1(test)
        )
    
    def forward(self, x):
        return self.classifier(x)

### 4. Uncertainty-Aware Classification Layer

In [25]:
class UncertaintyAwareClassifier(nn.Module):
    def __init__(self, feature_dim):
        super().__init__()
        self.classifier = nn.Linear(feature_dim, 2)
        self.uncertainty_threshold = nn.Parameter(torch.tensor([0.5]))

    def forward(self, features):
        logits = self.classifier(features)
        probabilities = F.softmax(logits, dim=1)

        uncertainty = -(probabilities * torch.log(probabilities)).sum(dim=1)
        noise_prob = (uncertainty > self.uncertainty_threshold).float()
        
        return probabilities, noise_prob.unsqueeze(1)

### 5. Fake & Real Voice Detection Model

In [26]:
class VoiceTypeIdentifier(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        '''self.conv = nn.Sequential(
            nn.Conv2d(1, input_dim//2, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(input_dim//2, input_dim, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.lstm = nn.LSTM(
            input_size=input_dim,
            hidden_size=input_dim*2,
            num_layers=2,
            batch_size=True
        )'''
        self.classifier = UncertaintyAwareClassifier(
            feature_dim=input_dim
        )
    
    def forward(self, x):
        #x = self.conv(x)
        #x = x.permute(0, 2, 1, 3).contiguous()
        #x = x.view(x.size(0), x.size(1), -1)
        #x, _ = self.lstm(x)
        #x = self.fc(x[:, -1, :])
        return self.classifier(x)

### 6. Total Model

1. [전이학습] 모델 임베딩을 전부 한번에 컨캣 해서 집어 넣도록 한다.
2. [불확실성 기반 소음 분류] 분류 레이어는 총 2개로 하나, 불확실성 정도를 측정해 반영한다
3. 로스를 BCEWithLogitsLoss와 BCELoss 혼합 사용

4. [도메인 적응] 특징 추출기 이후 도메인 적응 레이어 추가 (확률 분포 차이 적응)
    - 트레인셋과 언라벨드 데이터셋의 차이는 소음의 유무
    - 소음의 유무만을 생각하면 언라벨드 데이터셋의 라벨 없는 문제는 해결됨
    - 도메인 적응 레이어를 통과하면 어느 도메인에서 온 것인지 헷갈리도록 만들어야 함
    - 결론적으로 메인 모델은 어느 도메인에서 온 것인지 몰라도 동일한 결과를 내도록
    - 도메인 적응 레이어의 학습은 테스트셋과 언라벨드 데이터셋을 구분하기 힘들도록 변환하는 과정을 거치도록
5. [적대적 학습] 적대적 도메인 분류기 추가
    - 어느 도메인으로부터 나온 데이터인지 추측하도록 함
    - 추측에 실패하도록 유도해야 함
    - 언라벨드 데이터셋과 테스트셋이 동일 도메인인 것으로 나오도록 유도

In [27]:
class VoiceDetector(nn.Module):
    def __init__(self, embedding_dim, noise_dim, hidden_size, latent_size):
        super().__init__()
        self.domain_adapter = DomainAdaptationLayer(feature_dim=embedding_dim + noise_dim)
        self.domain_classifier = AdversarialDomainClassifier(feature_dim=embedding_dim + noise_dim)
        self.encoder = FeatureExtractor(
            embedding_size=embedding_dim + noise_dim,
            hidden_size=hidden_size,
            latent_size=latent_size
        )
        self.classifier = UncertaintyAwareClassifier(
            feature_dim=latent_size
        )

    def forward(self, pretrained_embedding, noise_embedding):
        combined_embedding = torch.cat((pretrained_embedding, noise_embedding), dim=1)
        adapted = self.domain_adapter(combined_embedding)
        feature = self.encoder(adapted)
        output, uncertainty = self.classifier(feature)
        #vad = (torch.max(torch.softmax(noise_embedding, dim=1), dim=1)[0] >= 0.5).float
        vad = torch.max(torch.softmax(noise_embedding, dim=1), dim=1)[0].view(-1, 1)
        return (output*vad, uncertainty), self.domain_classifier(adapted)

In [28]:
# Set model parameters
model_params = dict(
    embedding_dim=len(train_dataset[0][0]),
    noise_dim=len(train_dataset[0][1]),
    hidden_size=1024,
    latent_size=128
)
model_params

{'embedding_dim': 256,
 'noise_dim': 256,
 'hidden_size': 1024,
 'latent_size': 128}

In [29]:
# Create model instance
model = VoiceDetector(**model_params)
model.to(device)

VoiceDetector(
  (domain_adapter): DomainAdaptationLayer(
    (feature_map): Sequential(
      (0): Linear(in_features=512, out_features=512, bias=True)
      (1): ReLU()
      (2): Linear(in_features=512, out_features=512, bias=True)
    )
  )
  (domain_classifier): AdversarialDomainClassifier(
    (classifier): Sequential(
      (0): Linear(in_features=512, out_features=100, bias=True)
      (1): ReLU()
      (2): Linear(in_features=100, out_features=1, bias=True)
    )
  )
  (encoder): FeatureExtractor(
    (initial_layer): Sequential(
      (0): Linear(in_features=512, out_features=1024, bias=True)
      (1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01)
    )
    (layer1): Sequential(
      (0): ResidualBlock(
        (conv1): Linear(in_features=1024, out_features=1024, bias=True)
        (bn1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): LeakyReLU(ne

In [30]:
# BinaryCrossEntropy
criterion = nn.BCEWithLogitsLoss()
domain_criterion = nn.BCEWithLogitsLoss()
uncertainty_criterion = nn.BCELoss()

# Adam optimizer
optimizer = torch.optim.Adam(params=model.parameters(), lr=Config.LR)

## Training & Validation

In [31]:
from sklearn.metrics import roc_auc_score

def multi_label_auc(y_true, y_scores):
    auc_scores = []
    for idx in range(y_true.shape[1]):
        auc = roc_auc_score(y_true[:, idx], y_scores[:, idx])
        auc_scores.append(auc)
    mean_auc_score = np.mean(auc_scores)
    return mean_auc_score

In [32]:
def state_manager(num_epochs, log_interval=5):
    cache = [0.0 for _ in range(10)]
    
    def update_state(
            progress, epoch=None, step=None, steps=None, train_loss=None,
            domain_acc=None, domain_loss=None, valid_acc=None, valid_loss=None, cross_acc=None, cross_loss=None
    ):
        new_state = [epoch, step, steps, train_loss, domain_acc, domain_loss, valid_acc, valid_loss, cross_acc, cross_loss]
        for i, state in enumerate(new_state):
            if state:
                cache[i] = state
        epoch, step, steps, train_loss, domain_acc, domain_loss, valid_acc, valid_loss, cross_acc, cross_loss = cache
        progress.update(1)
        print(f"\rEpoch [{epoch}/{num_epochs}], Step [{step}/{steps}], Loss: {train_loss:.6f}, Domain: {domain_acc:.6%} | {domain_loss:.6f}, "
            + f"Valid: {valid_acc:.6%} | {valid_loss:.6f}, Cross Valid: {cross_acc:.6%} | {cross_loss:.6f}", end="   ")
        
        def result():
            print(end="\n" if epoch % log_interval == 0 or epoch == num_epochs else "")
            return cross_acc
        return result
    
    return tqdm(range(1, num_epochs+1), desc="Running Epochs"), update_state

In [33]:
def infinite_loader(loader):
    while True:
        for data in loader:
            yield data

In [34]:
num_epochs = 200
epochs, update_state = state_manager(num_epochs, log_interval=5)  # to change epochs, you should re-instantiate the entire model

performance = 0
loaders = (train_loader, unlabeled_loader, valid_loader, valid_aug_loader)
lens = list(map(len, loaders))
total_steps = num_epochs * lens[0]

with (tqdm(total=lens[0], desc="Training") as tr_pgrs, tqdm(total=lens[2], desc="Validation") as val_pgrs, tqdm(total=lens[3], desc="Cross Validation") as crv_pgrs):
    for epoch in epochs:
        [p.reset(total=l) for p, l in zip((tr_pgrs, val_pgrs, crv_pgrs), lens)]  # progressbar reset

        # Train & Domain Adapt
        model.train()
        for step, (train_inputs, adpt_inputs) in enumerate(zip(loaders[0], infinite_loader(loaders[1]))):
            optimizer.zero_grad()

            *train_features, labels = (data.float().to(device) for data in train_inputs)
            (outputs, uncertainty), train_domain = model(*train_features)
            _, test_domain = model(*(data.float().to(device) for data in adpt_inputs))
            domain_outputs = torch.cat([train_domain, test_domain])
            domain_labels = torch.cat([torch.zeros(train_domain.shape), torch.ones(test_domain.shape)]).to(device)

            main_loss = criterion(outputs, labels)
            domain_loss = domain_criterion(domain_outputs, domain_labels)  # induce domain classifier produce wrong results
            uncertainty_loss = uncertainty_criterion(torch.transpose(uncertainty, 1, 0).squeeze(), (labels.sum(dim=1) == 0).float())  # TODO

            current_step = epoch * lens[0] + step
            lambda_param = 2 / (1 + math.exp(-10 * current_step / total_steps)) - 1  # gradual improvement during entire epochs
            
            total_loss = main_loss + lambda_param * domain_loss + uncertainty_loss
            domain_acc = ((F.softmax(domain_outputs, dim=1) >= 0.5) == domain_labels).sum() / len(domain_labels)
            
            total_loss.backward()
            optimizer.step()

            update_state(tr_pgrs, epoch, step, lens[0], (main_loss+uncertainty_loss).item(), domain_acc, domain_loss.item())

        # Cross Domain Validation
        model.eval()
        with torch.no_grad():
            for loader, loader_len, progress, cross in zip(loaders[-2:], lens[-2:], (val_pgrs, crv_pgrs), (False, True)):
                val_loss, val_labels, val_outputs = 0, [], []
                
                for inputs in loader:
                    *features, labels = (data.float().to(device) for data in inputs)
                    (predicted, _), _ = model(*features)

                    val_loss += criterion(predicted, labels).item() / loader_len
                    val_labels.append(labels.cpu().numpy())
                    val_outputs.append(predicted.cpu().numpy())
                    val_acc = multi_label_auc(np.concatenate(val_labels, axis=0), np.concatenate(val_outputs, axis=0))

                    if cross:
                        result = update_state(progress, cross_acc=val_acc, cross_loss=val_loss)
                    else:
                        update_state(progress, valid_acc=val_acc, valid_loss=val_loss)

        performance = result()

Running Epochs:   0%|          | 0/200 [00:00<?, ?it/s]

Training:   0%|          | 0/347 [00:00<?, ?it/s]

Validation:   0%|          | 0/87 [00:00<?, ?it/s]

Cross Validation:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [5/200], Step [346/347], Loss: 97.463898, Domain: 67.368424% | 0.055957, Valid: 79.697422% | 0.687184, Cross Valid: 67.803183% | 0.687791    
Epoch [10/200], Step [346/347], Loss: 65.201866, Domain: 67.368424% | 0.044169, Valid: 84.452587% | 0.685763, Cross Valid: 74.378988% | 0.687337   
Epoch [15/200], Step [346/347], Loss: 53.909710, Domain: 67.368424% | 0.022751, Valid: 86.258965% | 0.684531, Cross Valid: 75.172151% | 0.687039   
Epoch [20/200], Step [346/347], Loss: 24.874237, Domain: 67.368424% | 0.010590, Valid: 87.768419% | 0.683626, Cross Valid: 76.921510% | 0.686677   
Epoch [25/200], Step [346/347], Loss: 7.128346, Domain: 67.368424% | 0.004954, Valid: 88.355678% | 0.682253, Cross Valid: 76.638907% | 0.686282    
Epoch [30/200], Step [346/347], Loss: 2.291114, Domain: 67.368424% | 0.004518, Valid: 89.874711% | 0.681694, Cross Valid: 77.500095% | 0.686157   
Epoch [35/200], Step [346/347], Loss: 0.677061, Domain: 67.368424% | 0.006370, Valid: 90.300090% | 0.681424, Cros

In [35]:
num_epochs = 30
epochs, update_state = state_manager(num_epochs, log_interval=5)  # to change epochs, you should re-instantiate the entire model

performance = 0
loaders = (train_loader, unlabeled_loader, valid_loader, valid_aug_loader)
lens = list(map(len, loaders))
total_steps = num_epochs * lens[0]

with (tqdm(total=lens[0], desc="Training") as tr_pgrs, tqdm(total=lens[2], desc="Validation") as val_pgrs, tqdm(total=lens[3], desc="Cross Validation") as crv_pgrs):
    for epoch in epochs:
        [p.reset(total=l) for p, l in zip((tr_pgrs, val_pgrs, crv_pgrs), lens)]  # progressbar reset

        # Train & Domain Adapt
        model.train()
        for step, (train_inputs, adpt_inputs) in enumerate(zip(loaders[0], infinite_loader(loaders[1]))):
            optimizer.zero_grad()

            *train_features, labels = (data.float().to(device) for data in train_inputs)
            (outputs, uncertainty), train_domain = model(*train_features)
            _, test_domain = model(*(data.float().to(device) for data in adpt_inputs))
            domain_outputs = torch.cat([train_domain, test_domain])
            domain_labels = torch.cat([torch.zeros(train_domain.shape), torch.ones(test_domain.shape)]).to(device)

            main_loss = criterion(outputs, labels)
            domain_loss = domain_criterion(domain_outputs, domain_labels)  # induce domain classifier produce wrong results
            uncertainty_loss = uncertainty_criterion(torch.transpose(uncertainty, 1, 0).squeeze(), (labels.sum(dim=1) == 0).float())  # TODO

            current_step = epoch * lens[0] + step
            lambda_param = 2 / (1 + math.exp(-10 * current_step / total_steps)) - 1  # gradual improvement during entire epochs
            
            total_loss = main_loss + lambda_param * domain_loss + uncertainty_loss
            domain_acc = ((F.softmax(domain_outputs, dim=1) >= 0.5) == domain_labels).sum() / len(domain_labels)
            
            main_loss.backward() #total_loss.backward()
            optimizer.step()

            update_state(tr_pgrs, epoch, step, lens[0], main_loss.item(), domain_acc, domain_loss.item())

        # Cross Domain Validation
        model.eval()
        with torch.no_grad():
            for loader, loader_len, progress, cross in zip(loaders[-2:], lens[-2:], (val_pgrs, crv_pgrs), (False, True)):
                val_loss, val_labels, val_outputs = 0, [], []
                
                for inputs in loader:
                    *features, labels = (data.float().to(device) for data in inputs)
                    (predicted, _), _ = model(*features)

                    val_loss += criterion(predicted, labels).item() / loader_len
                    val_labels.append(labels.cpu().numpy())
                    val_outputs.append(predicted.cpu().numpy())
                    val_acc = multi_label_auc(np.concatenate(val_labels, axis=0), np.concatenate(val_outputs, axis=0))

                    if cross:
                        result = update_state(progress, cross_acc=val_acc, cross_loss=val_loss)
                    else:
                        update_state(progress, valid_acc=val_acc, valid_loss=val_loss)

        performance = result()

Running Epochs:   0%|          | 0/30 [00:00<?, ?it/s]

Training:   0%|          | 0/347 [00:00<?, ?it/s]

Validation:   0%|          | 0/87 [00:00<?, ?it/s]

Cross Validation:   0%|          | 0/175 [00:00<?, ?it/s]

Epoch [5/30], Step [346/347], Loss: 0.678297, Domain: 67.368424% | 0.000690, Valid: 93.152872% | 0.679676, Cross Valid: 82.538396% | 0.685318   
Epoch [10/30], Step [346/347], Loss: 0.679720, Domain: 67.368424% | 0.028326, Valid: 94.870552% | 0.678908, Cross Valid: 80.118158% | 0.685775   
Epoch [15/30], Step [346/347], Loss: 0.677298, Domain: 67.368424% | 0.446897, Valid: 95.761601% | 0.678723, Cross Valid: 81.592588% | 0.685684   
Epoch [20/30], Step [346/347], Loss: 0.678185, Domain: 67.368424% | 3.888340, Valid: 95.476381% | 0.678774, Cross Valid: 82.163019% | 0.685459   
Epoch [25/30], Step [346/347], Loss: 0.676878, Domain: 67.368424% | 7.659103, Valid: 96.523526% | 0.678263, Cross Valid: 80.617650% | 0.685522   
Epoch [30/30], Step [346/347], Loss: 0.675642, Domain: 67.368424% | 14.241254, Valid: 96.185608% | 0.678566, Cross Valid: 82.127660% | 0.685432   


### Model Save

In [40]:
if not os.path.isdir(os.path.join(".", "models")):
    os.mkdir(os.path.join(".", "models"))

# Model Save
save_path = os.path.join(".", "models", f"{Config.NB_NAME}_acc_{performance*100:.6f}.pt")
torch.save(model.state_dict(), save_path)
print(f"Model saved to {save_path}")

Model saved to ./models/transfer_learning_acc_82.127660.pt


## Inference
테스트 데이터셋에 대한 추론은 다음 순서로 진행됩니다.

1. 모델 및 디바이스 설정
    - 모델을 주어진 device(GPU 또는 CPU)로 이동시키고, 평가모드로 전환합니다.
2. 예측 수행
    - 예측 결과를 저장한 빈 리스트를 초기화하고 test_loader에서 배치별로 데이터를 불러와 예측을 수행합니다.
    - 각 배치에 대해 스펙트로그램 데이터를 device로 이동시킵니다.
    - 모델 예측 확률(probs)을 계산합니다.
    - 예측 확률을 predictions리스트에 추가합니다.

In [41]:
predicted_labels = []

model.to(device)
model.eval()
with torch.no_grad():
    for features in tqdm(test_loader):
        (probs, _), _ = model(*map(lambda x: x.to(device), features))
        probs = probs.cpu().detach().numpy()
        predicted_labels += probs.tolist()

  0%|          | 0/391 [00:00<?, ?it/s]

### Submission
추론 결과를 제출 양식에 덮어 씌워 CSV 파일로 생성하는 과정은 다음과 같습니다.

1. 제출 양식 로드
    - pd.read_csv('./sample_submission.csv')를 사용하여 제출을 위한 샘플 형식 파일을 로드합니다.
    - 이 파일은 일반적으로 각 테스트 샘플에 대한 ID와 예측해야 하는 필드가 포함된 템플릿 형태를 가지고 있습니다.
2. 예측 결과 할당
    - submit.iloc[:,1:] = preds 추론함수(inference)에서 반환된 예측결과(preds)를 샘플 제출 파일에 2번째 열부터 할당합니다.
3. 제출 파일 저장
    - 수정된 제출 파일을 baseline_submit 이란 이름의 CSV 파일로 저장합니다.
    - index=False는 파일 저장시 추가적인 index가 발생하지 않도록 설정하여, 제작한 제출 파일과 동일한 형태의 파일을 저장합니다.

In [42]:
submit = pd.read_csv(test_dataset.submission_form_path)
submit.iloc[:, 1:] = predicted_labels
submit.head()

  submit.iloc[:, 1:] = predicted_labels
  submit.iloc[:, 1:] = predicted_labels


Unnamed: 0,id,fake,real
0,TEST_00000,1.806102e-08,0.023936
1,TEST_00001,9.052732e-09,0.027369
2,TEST_00002,6.931344e-08,0.042886
3,TEST_00003,3.201205e-06,0.045851
4,TEST_00004,3.75794e-08,0.019616


In [43]:
submission_dir = "submissions"
if not os.path.isdir(submission_dir):
    os.mkdir(submission_dir)

submit_file_path = os.path.join(".", submission_dir, f"{Config.NB_NAME}_acc_{performance*100:.6f}_submit.csv")
submit.to_csv(submit_file_path, index=False)
print("File saved to", submit_file_path)

File saved to ./submissions/transfer_learning_acc_82.127660_submit.csv
