In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.model_selection import StratifiedKFold
from transformers import HubertForSequenceClassification, AutoConfig, Wav2Vec2FeatureExtractor
import pandas as pd
import librosa
import numpy as np
from tqdm import tqdm

from src.models import EModel

In [2]:
model_name_or_path = 'facebook/hubert-large-ll60k'
config = AutoConfig.from_pretrained(model_name_or_path, num_labels=2)
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name_or_path)
sampling_rate = feature_extractor.sampling_rate



In [3]:
# model = EModel(config=config)
model = EModel.load_from_checkpoint("/home/work/joono/joono/joono/DV_DV.Deep/czvypyil/checkpoints/best-checkpoint.ckpt", config=config)

Some weights of HubertForSequenceClassification were not initialized from the model checkpoint at facebook/hubert-large-ll60k and are newly initialized: ['classifier.bias', 'classifier.weight', 'encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'encoder.pos_conv_embed.conv.parametrizations.weight.original1', 'projector.bias', 'projector.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
def speech_file_to_array_fn(path):
    audio, _ = librosa.load(path, sr=sampling_rate)
    inputs = feature_extractor(audio, sampling_rate=sampling_rate, return_tensors="pt", padding=True)
    return inputs.input_values.squeeze()

In [5]:
# Collate 함수 정의
def collate_fn(batch):
    signals, labels = zip(*batch)
    max_length = max([signal.size(0) for signal in signals])
    padded_signals = torch.zeros(len(signals), max_length)
    for i, signal in enumerate(signals):
        padded_signals[i, :signal.size(0)] = signal
    labels = torch.tensor(labels)
    return padded_signals, labels

In [6]:
class TestDataset(Dataset):
    def __init__(self, df):
        self.df = df

    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, idx):
        path = os.path.join("..", "dataset", self.df.loc[idx, 'path'])
        if not os.path.exists(path):
            raise FileNotFoundError(f"파일을 찾을 수 없습니다: {path}")     
        signal = speech_file_to_array_fn(path)           
        return signal, -1

In [7]:
test_df = pd.read_csv('../dataset/test.csv', index_col=None)
test_df['path'] = '../dataset/' + test_df['path'].str[1:]
test_dataset = TestDataset(test_df)
test_loader = DataLoader(test_dataset, shuffle=False, num_workers=24, batch_size=8, collate_fn=collate_fn)

In [8]:
def inference(model, test_loader, device):
    model.to(device)
    model = model.eval()
    predictions = []
    with torch.no_grad():
        for inputs, labels in tqdm(test_loader):
            
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            probs = model(inputs)

            probs  = probs.cpu().detach().numpy()
            predictions += probs.tolist()
            
    return predictions

In [9]:
preds = inference(model=model, test_loader=test_loader,device='cuda:0')

  return F.conv1d(input, weight, bias, self.stride,
100%|██████████| 6250/6250 [03:38<00:00, 28.54it/s]


In [10]:
submit = pd.read_csv('/home/work/joono/joono/dataset/sample_submission.csv')

max_thres = 0.7
min_thres = 0.3

for i in tqdm(range(len(preds))):
    if      preds[i][0] > max_thres : submit.iloc[i, 1] = 1
    elif    preds[i][0] < min_thres : submit.iloc[i, 1] = 0 
    else                            : submit.iloc[i, 1] = preds[i][0]
    if      preds[i][1] > max_thres : submit.iloc[i, 2] = 1
    elif    preds[i][1] < min_thres : submit.iloc[i, 2] = 0 
    else                            : submit.iloc[i, 2] = preds[i][1]
submit.head()

  else                            : submit.iloc[i, 1] = preds[i][0]
  else                            : submit.iloc[i, 2] = preds[i][1]
100%|██████████| 50000/50000 [00:08<00:00, 6173.34it/s]


Unnamed: 0,id,fake,real
0,TEST_00000,0.480067,0.523808
1,TEST_00001,0.473331,0.531042
2,TEST_00002,0.477658,0.521832
3,TEST_00003,0.507953,0.489889
4,TEST_00004,0.486331,0.517263


In [11]:
submit.to_csv('joono_hubert_bce_test_submit_zero_one5.csv', index=False)

In [None]:
preds

In [17]:
sum(submit['fake'] < 0.3)

0