In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset
from transformers import AutoFeatureExtractor, AutoModelForAudioClassification 
import pandas as pd
import librosa
import numpy as np
from tqdm import tqdm

import sys
sys.path.append("..")
from src.models import EModel, AASIST, Wav2Vec2Facebook
from src.datamodules import AASIST2DataModule, AASISTCenterLossDataset
from src.AASIST import AASISTModule

In [2]:
AASIST_weight_path = "/home/work/joono/joono/joono/src/AASIST_weight/AASIST.pth"
classifier = AASISTModule()
classifier.load_state_dict(torch.load(AASIST_weight_path))

<All keys matched successfully>

In [3]:
# Collate 함수 정의
def collate_fn(batch):
    # signals = zip(*batch)
    signals = batch
    max_length = max([signal.size(0) for signal in signals])
    padded_signals = torch.zeros(len(signals), max_length)
    for i, signal in enumerate(signals):
        padded_signals[i, :signal.size(0)] = signal
    # labels = torch.tensor(labels)
    return padded_signals

In [6]:
test_df = pd.read_csv('../dataset/train.csv', index_col=None)
test_df['path'] = '../dataset/' + test_df['path'].str[1:]
# test_dataset = TestDataset(test_df)
test_dataset = AASISTCenterLossDataset(test_df, train_mode=False)
# test_loader = DataLoader(test_dataset, shuffle=False, num_workers=24, batch_size=32)
test_loader = DataLoader(test_dataset, shuffle=False, num_workers=24, batch_size=128, collate_fn=collate_fn)

In [7]:
def inference(model, test_loader, device):
    model.to(device)
    model = model.eval()
    predictions = []
    with torch.no_grad():
        # for inputs, labels in tqdm(test_loader):
        for inputs in tqdm(test_loader):
            
            inputs = inputs.to(device)
            # labels = labels.to(device)
            
            logit, _ = model(inputs)
            probs = torch.sigmoid(logit)

            probs  = probs.cpu().detach().numpy()
            predictions += probs.tolist()
            
    return predictions

In [8]:
preds = inference(model=classifier, test_loader=test_loader, device='cuda:0')
# preds = model.inference(test_loader=test_loader)

100%|██████████| 434/434 [01:20<00:00,  5.36it/s]


In [9]:
submit = pd.read_csv('/home/work/joono/joono/dataset/train_sample_submission.csv')

max_thres = 0.999999
min_thres = 0.000001

fake, real = 1, 0

for i in tqdm(range(len(preds))):
    submit.iloc[i, 1] = preds[i][fake]
    submit.iloc[i, 2] = preds[i][real]
    
    # if      preds[i][0] > max_thres : submit.iloc[i, 1] = 1
    # elif    preds[i][0] < min_thres : submit.iloc[i, 1] = 0 
    # else                            : submit.iloc[i, 1] = preds[i][0]
    # if      preds[i][1] > max_thres : submit.iloc[i, 2] = 1
    # elif    preds[i][1] < min_thres : submit.iloc[i, 2] = 0 
    # else                            : submit.iloc[i, 2] = preds[i][1]
    # if      preds[i][fake] > max_thres  : submit.iloc[i, 1] = 1
    # elif    preds[i][fake] < min_thres  : submit.iloc[i, 1] = 0 
    # else                                : submit.iloc[i, 1] = preds[i][0]
    # if      preds[i][real] > max_thres  : submit.iloc[i, 2] = 1
    # elif    preds[i][real] < min_thres  : submit.iloc[i, 2] = 0 
    # else                                : submit.iloc[i, 2] = preds[i][1]

  submit.iloc[i, 1] = preds[i][fake]
  submit.iloc[i, 2] = preds[i][real]
100%|██████████| 55438/55438 [00:08<00:00, 6170.92it/s]


In [10]:
submit.to_csv('AASIST_train_base.csv', index=False)

In [None]:
preds

In [None]:
import os
import pathlib
from glob import glob

In [None]:

ids, paths = [], []
for ul_path in glob("/home/work/joono/joono/dataset/unlabeled_data/*"):
    basename = os.path.basename(ul_path)
    id = basename.split(".")[0]
    path = os.path.join(".", "unlabeled_data", basename)
    
    ids.append(id)
    paths.append(path)

In [None]:
df = pd.DataFrame({
    "id": ids,
    "path": paths
})

In [None]:
df.to_csv("unlabled_data.csv", index=False)

In [None]:
paths[763]