In [8]:
import wave
import os

In [21]:
files = os.listdir('data_drumstems_bdt')

In [22]:
with open("data_drumstems_bdt/files.txt", "w", encoding="utf-8") as file:
    for item in files:
        with wave.open(f"data_drumstems_bdt/{item}", 'rb') as wav_file:
            frames = wav_file.getnframes()
            rate = wav_file.getframerate()
            duration = int(frames / float(rate))
        file.write(f"{item} {duration}\n")

In [None]:
from note_seq.midi_io import midi_file_to_note_sequence

def preprocess_midi_onsets(num):
    # MIDI 피치 번호를 악기명으로 매핑합니다.
    pitch_to_label = {
        36: "KD",
        39: "CL",
        42: "HH",
        49: "CY"
    }
    
    # MIDI 파일 읽기
    midi = midi_file_to_note_sequence(f'../dataset/full/midi/{num}.mid')
    
    onset_events = []
    for note in midi.notes:
        label = pitch_to_label[note.pitch]
        onset_time = round(note.start_time, 6)
        onset_events.append((onset_time, label))
    
    onset_events.sort(key=lambda x: (x[1], x[0]))
    return onset_events

In [None]:
import os

for i in range(4376, 6251):
    os.system(f'cp data_drumstems_bdt/{i}.wav data_evals/BDT_DRUMS/audio/{i}.wav')
    os.system(f'rm data_drumstems_bdt/{i}.wav')

In [25]:
for i in range(4376, 6251):
    ith_events = preprocess_midi_onsets(i)
    with open(f"data_evals/BDT_DRUMS/annotations/{i}.txt", "w", encoding="utf-8") as file:
        for event in ith_events:
            file.write(f"{event[0]:.6f}\t{event[1]}\n")

In [9]:
src_prefix = '../dataset/instruments-new'
tgt_prefix = 'data_drum_sources_bdt'
folder2label = {'kick':'KD', 'clap':'CL', 'hihat':'HH', 'snare':'CY'}
n = 22
for folder in folder2label:
    for i in range(1, n):
        os.system(f'cp {src_prefix}/{folder}/{i:03d}.wav {tgt_prefix}/{i}\\){folder2label[folder]}.wav')

In [2]:
from pydub import AudioSegment

def extend_audio_to_2sec(input_file, output_file):
    # WAV 파일 읽기
    audio = AudioSegment.from_wav(input_file)
    target_duration = 2000  # 2초 = 2000 밀리초

    # 현재 파일의 길이가 2초 미만이면, 무음 추가
    if len(audio) < target_duration:
        silence = AudioSegment.silent(duration=target_duration - len(audio))
        extended_audio = audio + silence
    else:
        extended_audio = audio

    # 결과 파일 저장
    extended_audio.export(output_file, format="wav")

In [14]:
import librosa
import numpy as np
import soundfile as sf

def extend_audio_to_2sec_sr(input_file, output_file, sr=22050):
    # 오디오 파일 불러오기 (지정한 sampling rate로)
    y, sr = librosa.load(input_file, sr=sr)
    
    # 현재 오디오 길이(초)
    current_duration = len(y) / sr
    target_duration = 2.0  # 2초 목표
    
    if current_duration < target_duration:
        # 필요한 샘플 수 계산
        target_length = int(target_duration * sr)
        # 부족한 부분은 0으로 패딩 (오디오의 끝에 무음 추가)
        y = np.pad(y, (0, target_length - len(y)), mode='constant')
    
    # 수정된 오디오 저장 (WAV 파일)
    sf.write(output_file, y, sr)


In [4]:
import glob
tgt_path = glob.glob('data_drum_sources_bdt/*')
len(tgt_path), tgt_path[0]

(84, 'data_drum_sources_bdt/4)CY.wav')

In [15]:
for tgt in tgt_path:
    extend_audio_to_2sec_sr(tgt, tgt)