### Fine-tuning


In [27]:
import os
import pandas as pd

data = pd.read_csv('../split/dev_split_Depression.csv')

# 참가자 ID와 PHQ8_Binary 컬럼만 추출하여 새로운 데이터프레임 생성
participant_labels = data[['Participant_ID', 'PHQ8_Binary']].set_index('Participant_ID')

# 참가자 ID와 wav 파일명을 매핑하기 위한 딕셔너리 생성
wav_files = {}
wav_directory = '../data/reduce/dev'  # wav 파일이 위치한 디렉토리

# 디렉토리 내 모든 파일 목록을 검색
for filename in os.listdir(wav_directory):
    if filename.endswith('.wav'):
        # 파일명에서 참가자 ID 추출 (예: '302_processed_1.wav'에서 '302' 추출)
        participant_id = int(filename.split('_')[0])
        if participant_id in wav_files:
            wav_files[participant_id].append(filename)
        else:
            wav_files[participant_id] = [filename]

# 각 참가자 ID에 대해 해당하는 wav 파일 리스트 출력
for participant_id, files in wav_files.items():
    print(f"Participant {participant_id}: Files: {files}")
    print(f"Label: {participant_labels.loc[participant_id, 'PHQ8_Binary']}")


Participant 302: Files: ['302_processed_1.wav', '302_processed_2.wav', '302_processed_3.wav', '302_processed_final.wav']
Label: 0
Participant 307: Files: ['307_processed_1.wav', '307_processed_10.wav', '307_processed_11.wav', '307_processed_12.wav', '307_processed_13.wav', '307_processed_14.wav', '307_processed_15.wav', '307_processed_2.wav', '307_processed_3.wav', '307_processed_4.wav', '307_processed_5.wav', '307_processed_6.wav', '307_processed_7.wav', '307_processed_8.wav', '307_processed_9.wav', '307_processed_final.wav']
Label: 0
Participant 331: Files: ['331_processed_1.wav', '331_processed_2.wav', '331_processed_3.wav', '331_processed_4.wav', '331_processed_5.wav', '331_processed_6.wav', '331_processed_final.wav']
Label: 0
Participant 335: Files: ['335_processed_1.wav', '335_processed_2.wav', '335_processed_3.wav', '335_processed_4.wav', '335_processed_5.wav', '335_processed_6.wav', '335_processed_7.wav', '335_processed_final.wav']
Label: 1
Participant 346: Files: ['346_process

In [28]:
import torch
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, Trainer, TrainingArguments
import torchaudio
import numpy as np
from datasets import Dataset


#### 1. 오디오 파일 처리 및 데이터셋 준비
오디오 파일을 로드하고, 모델이 입력으로 사용할 수 있도록 특징을 추출하는 함수를 정의합니다.

In [32]:
processor = Wav2Vec2Processor.from_pretrained("kresnik/wav2vec2-large-xlsr-korean")
model = Wav2Vec2ForCTC.from_pretrained("kresnik/wav2vec2-large-xlsr-korean")

def prepare_dataset(batch):
    audio_path = f"../data/reduce/dev/{batch['audio_filename']}"
    speech_array, sampling_rate = torchaudio.load(audio_path)
    batch["input_values"] = processor(speech_array.squeeze(), sampling_rate=sampling_rate).input_values
    batch["labels"] = np.full(len(batch["input_values"]), batch["label"])
    return batch

# 데이터셋을 생성
data_items = []
for participant_id, files in wav_files.items():
    label = participant_labels.loc[participant_id, 'PHQ8_Binary']
    for file_name in files:
        data_items.append({'audio_filename': file_name, 'label': label})

dataset = Dataset.from_pandas(pd.DataFrame(data_items))
processed_dataset = dataset.map(prepare_dataset, remove_columns=dataset.column_names)


loading feature extractor configuration file https://huggingface.co/kresnik/wav2vec2-large-xlsr-korean/resolve/main/preprocessor_config.json from cache at C:\Users\Jws/.cache\huggingface\transformers\27306b13697cb64bc43e0f1d31f53f092d15df6edd9a31f1c02e47b1602e99d8.bbc1eb890a39c82e710a893223b8452ac5b78e8b57083b2f893aa7dc59d4ed69
Feature extractor Wav2Vec2FeatureExtractor {
  "do_normalize": true,
  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
  "feature_size": 1,
  "padding_side": "right",
  "padding_value": 0.0,
  "return_attention_mask": true,
  "sampling_rate": 16000
}

loading file https://huggingface.co/kresnik/wav2vec2-large-xlsr-korean/resolve/main/vocab.json from cache at C:\Users\Jws/.cache\huggingface\transformers\3124f16a7fe8ebc39e54f235f7839aa5a221e8c1d48887c5ddef37a61d735d41.aaf4866cca866da8b42867b565d7bb952a198499d25daa80e0695b7937811d9d
loading file https://huggingface.co/kresnik/wav2vec2-large-xlsr-korean/resolve/main/tokenizer_config.json from cache at C:\Users

#### 2. 학습 파라미터 설정 및 Fine-tuning
학습 파라미터를 설정하고, Trainer 객체를 사용하여 모델을 학습시킵니다.

In [None]:
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=8,
    gradient_accumulation_steps=2,
    evaluation_strategy="epoch",
    num_train_epochs=3,
    save_strategy="epoch",
    logging_dir='./logs',
    learning_rate=1e-4
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=processed_dataset,
    tokenizer=processor.feature_extractor
)

trainer.train()
