In [3]:
import librosa
import numpy as np
from scipy.signal import butter, filtfilt
import soundfile as sf
import os

In [6]:
# Split_input의 모든 음원 파일
src = '/content/drive/MyDrive/Hmm2Song/data/song/Split_input/'
files = os.listdir(src)
files

['013_03.mp3',
 '013_04.mp3',
 '013_05.mp3',
 '013_06.mp3',
 '013_07.mp3',
 '013_08.mp3',
 '013_09.mp3',
 '014_00.mp3',
 '014_01.mp3',
 '014_02.mp3',
 '014_03.mp3',
 '014_04.mp3',
 '014_05.mp3',
 '014_06.mp3',
 '014_07.mp3',
 '014_08.mp3',
 '014_09.mp3',
 '015_00.mp3',
 '015_01.mp3',
 '015_02.mp3',
 '015_03.mp3',
 '015_04.mp3',
 '015_05.mp3',
 '015_06.mp3',
 '015_07.mp3',
 '015_08.mp3',
 '015_09.mp3',
 '016_00.mp3',
 '016_01.mp3',
 '016_02.mp3',
 '016_03.mp3',
 '016_04.mp3',
 '016_05.mp3',
 '016_06.mp3',
 '016_07.mp3',
 '016_08.mp3',
 '016_09.mp3',
 '017_00.mp3',
 '017_01.mp3',
 '017_02.mp3',
 '017_03.mp3',
 '017_04.mp3',
 '017_05.mp3',
 '017_06.mp3',
 '017_07.mp3',
 '017_08.mp3',
 '017_09.mp3',
 '020_00.mp3',
 '020_01.mp3',
 '020_02.mp3',
 '020_03.mp3',
 '020_04.mp3',
 '020_05.mp3',
 '020_06.mp3',
 '020_07.mp3',
 '020_08.mp3',
 '020_09.mp3',
 '018_00.mp3',
 '018_01.mp3',
 '018_02.mp3',
 '018_03.mp3',
 '018_04.mp3',
 '018_05.mp3',
 '018_06.mp3',
 '018_07.mp3',
 '018_08.mp3',
 '018_09.m

In [10]:
file_list = []
for f in files:
  if f.endswith('.mp3'):
    file_list.append(f)
file_list

['013_03.mp3',
 '013_04.mp3',
 '013_05.mp3',
 '013_06.mp3',
 '013_07.mp3',
 '013_08.mp3',
 '013_09.mp3',
 '014_00.mp3',
 '014_01.mp3',
 '014_02.mp3',
 '014_03.mp3',
 '014_04.mp3',
 '014_05.mp3',
 '014_06.mp3',
 '014_07.mp3',
 '014_08.mp3',
 '014_09.mp3',
 '015_00.mp3',
 '015_01.mp3',
 '015_02.mp3',
 '015_03.mp3',
 '015_04.mp3',
 '015_05.mp3',
 '015_06.mp3',
 '015_07.mp3',
 '015_08.mp3',
 '015_09.mp3',
 '016_00.mp3',
 '016_01.mp3',
 '016_02.mp3',
 '016_03.mp3',
 '016_04.mp3',
 '016_05.mp3',
 '016_06.mp3',
 '016_07.mp3',
 '016_08.mp3',
 '016_09.mp3',
 '017_00.mp3',
 '017_01.mp3',
 '017_02.mp3',
 '017_03.mp3',
 '017_04.mp3',
 '017_05.mp3',
 '017_06.mp3',
 '017_07.mp3',
 '017_08.mp3',
 '017_09.mp3',
 '020_00.mp3',
 '020_01.mp3',
 '020_02.mp3',
 '020_03.mp3',
 '020_04.mp3',
 '020_05.mp3',
 '020_06.mp3',
 '020_07.mp3',
 '020_08.mp3',
 '020_09.mp3',
 '018_00.mp3',
 '018_01.mp3',
 '018_02.mp3',
 '018_03.mp3',
 '018_04.mp3',
 '018_05.mp3',
 '018_06.mp3',
 '018_07.mp3',
 '018_08.mp3',
 '018_09.m

** 증강 type **
* 필터(하이/로우) -> fh / fl
* noise -> n
* monotone 변환 (하이 / 로우) -> mh / fl


In [7]:
# 로우패스 필터 함수
def apply_lowpass_filter(y, sr, cutoff_freq=5000):
  nyquist = 0.5 * sr
  normal_cutoff = cutoff_freq / nyquist
  b, a = butter(1, normal_cutoff, btype='low', analog=False)
  y_lowpass = filtfilt(b, a, y)
  return y_lowpass

# 하이패스 필터 함수
def apply_highpass_filter(y, sr, cutoff_freq=1000):
  nyquist = 0.5 * sr
  normal_cutoff = cutoff_freq / nyquist
  b, a = butter(1, normal_cutoff, btype='high', analog=False)
  y_highpass = filtfilt(b, a, y)
  return y_highpass

# 모노톤 변환 함수
def apply_monotone_transformation(y, sr, semitone_shift):
  y_pitch_shifted = librosa.effects.pitch_shift(y, sr = sr, n_steps = semitone_shift)
  return y_pitch_shifted

## 허밍 데이터 증강

In [8]:
# 허밍 데이터 저장 경로
output_humming_file = '/content/drive/MyDrive/Hmm2Song/data/song/Split_hum/'

In [11]:
for f in file_list:
  input_tmp = src + f
  y, sr = librosa.load(input_tmp, sr=None)

  f_name = f[:-4]

  # 로우패스 필터 적용
  y_lowpass = apply_lowpass_filter(y, sr)
  sf.write(output_humming_file + f_name + "_fl.mp3", y_lowpass, sr)

  # 하이패스 필터 적용
  y_highpass = apply_highpass_filter(y, sr)
  sf.write(output_humming_file + f_name + "_fh.mp3", y_highpass, sr)

  # 모노 로우
  y_monotone = apply_monotone_transformation(y, sr, -3)  ## 피치 -3
  sf.write(output_humming_file + f_name + "_ml.mp3", y_monotone, sr)

  # 모노 하이
  y_monotone = apply_monotone_transformation(y, sr, 3)  ## 피치 +3
  sf.write(output_humming_file + f_name + "_mh.mp3", y_monotone, sr)

  # 노이즈
  noise = np.random.normal(0.005, 0.01, len(y))
  y_noisy = y + noise
  sf.write(output_humming_file + f_name + "_n.mp3", y_noisy, sr)

  print(f, "done")

[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
622_03.mp3 done
622_04.mp3 done
622_05.mp3 done
622_06.mp3 done
622_07.mp3 done
622_08.mp3 done
622_09.mp3 done
623_00.mp3 done
623_01.mp3 done
623_02.mp3 done
623_03.mp3 done
623_04.mp3 done
623_05.mp3 done
623_06.mp3 done
623_07.mp3 done
623_08.mp3 done
623_09.mp3 done
624_00.mp3 done
624_01.mp3 done
624_02.mp3 done
624_03.mp3 done
624_04.mp3 done
624_05.mp3 done
624_06.mp3 done
624_07.mp3 done
624_08.mp3 done
624_09.mp3 done
625_00.mp3 done
625_01.mp3 done
625_02.mp3 done
625_03.mp3 done
625_04.mp3 done
625_05.mp3 done
625_06.mp3 done
625_07.mp3 done
625_08.mp3 done
625_09.mp3 done
547_09.mp3 done
548_00.mp3 done
548_01.mp3 done
548_02.mp3 done
548_03.mp3 done
548_04.mp3 done
548_05.mp3 done
548_06.mp3 done
548_07.mp3 done
548_08.mp3 done
548_09.mp3 done
549_00.mp3 done
549_01.mp3 done
549_02.mp3 done
549_03.mp3 done
549_04.mp3 done
549_05.mp3 done
549_06.mp3 done
549_07.mp3 done
549_08.mp3 done
549_09.mp3 done
550_00.mp3 done
550_01



801_28.mp3 done
749_10.mp3 done
749_11.mp3 done
749_12.mp3 done
749_13.mp3 done
749_14.mp3 done
749_15.mp3 done
749_16.mp3 done
749_17.mp3 done
749_18.mp3 done
749_19.mp3 done
749_20.mp3 done
749_21.mp3 done
749_22.mp3 done
749_23.mp3 done
749_24.mp3 done
749_25.mp3 done
749_26.mp3 done
749_27.mp3 done
749_28.mp3 done
749_29.mp3 done
765_10.mp3 done
765_11.mp3 done
765_12.mp3 done
765_13.mp3 done
765_14.mp3 done
765_15.mp3 done
765_16.mp3 done
765_17.mp3 done
765_18.mp3 done
765_19.mp3 done
765_20.mp3 done
765_21.mp3 done
765_22.mp3 done
765_23.mp3 done
765_24.mp3 done
765_25.mp3 done
753_10.mp3 done
753_11.mp3 done
753_12.mp3 done
753_13.mp3 done
753_14.mp3 done
753_15.mp3 done
753_16.mp3 done
753_17.mp3 done
753_18.mp3 done
753_19.mp3 done
753_20.mp3 done
753_21.mp3 done
753_22.mp3 done
753_23.mp3 done
753_24.mp3 done
753_25.mp3 done
753_26.mp3 done
695_10.mp3 done
695_11.mp3 done
695_12.mp3 done
695_13.mp3 done
695_14.mp3 done
695_15.mp3 done
695_16.mp3 done
695_17.mp3 done
695_18.m



[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
649_15.mp3 done
649_16.mp3 done
649_17.mp3 done
649_18.mp3 done
649_19.mp3 done
649_20.mp3 done
649_21.mp3 done
649_22.mp3 done
649_23.mp3 done
649_24.mp3 done
650_10.mp3 done
650_11.mp3 done
650_12.mp3 done
650_13.mp3 done
650_14.mp3 done
650_15.mp3 done
650_16.mp3 done
650_17.mp3 done
650_18.mp3 done
650_19.mp3 done
650_20.mp3 done
650_21.mp3 done
650_22.mp3 done
650_23.mp3 done
650_24.mp3 done
650_25.mp3 done
650_26.mp3 done
650_27.mp3 done
650_28.mp3 done
650_29.mp3 done
650_30.mp3 done
651_10.mp3 done
651_11.mp3 done
651_12.mp3 done
651_13.mp3 done
651_14.mp3 done
651_15.mp3 done
651_16.mp3 done
651_17.mp3 done
651_18.mp3 done
651_19.mp3 done
651_20.mp3 done
651_21.mp3 done
651_22.mp3 done
651_23.mp3 done
651_24.mp3 done
651_25.mp3 done
651_26.mp3 done
652_10.mp3 done
652_11.mp3 done
652_12.mp3 done
652_13.mp3 done
652_14.mp3 done
652_15.mp3 done
652_16.mp3 done
652_17.mp3 done
652_18.mp3 done
652_19.mp3 done
652_20.mp3 done
652_21



304_29.mp3 done
291_10.mp3 done
291_11.mp3 done
291_12.mp3 done
291_13.mp3 done
291_14.mp3 done
291_15.mp3 done
291_16.mp3 done
291_17.mp3 done
291_18.mp3 done
291_19.mp3 done
291_20.mp3 done
291_21.mp3 done
291_22.mp3 done
291_23.mp3 done
291_24.mp3 done
291_25.mp3 done
291_26.mp3 done
291_27.mp3 done
291_28.mp3 done
291_29.mp3 done
291_30.mp3 done
291_31.mp3 done
291_32.mp3 done
291_33.mp3 done
291_34.mp3 done
297_10.mp3 done
297_11.mp3 done
297_12.mp3 done
297_13.mp3 done
297_14.mp3 done
297_15.mp3 done
297_16.mp3 done
297_17.mp3 done
297_18.mp3 done
297_19.mp3 done
297_20.mp3 done
297_21.mp3 done
297_22.mp3 done
297_23.mp3 done
297_24.mp3 done
297_25.mp3 done
297_26.mp3 done
297_27.mp3 done
306_10.mp3 done
306_11.mp3 done
306_12.mp3 done
306_13.mp3 done
306_14.mp3 done
306_15.mp3 done
306_16.mp3 done
306_17.mp3 done
306_18.mp3 done
306_19.mp3 done
306_20.mp3 done
306_21.mp3 done
306_22.mp3 done
306_23.mp3 done
306_24.mp3 done
306_25.mp3 done
306_26.mp3 done
306_27.mp3 done
306_28.m



163_27.mp3 done
164_10.mp3 done
164_11.mp3 done
164_12.mp3 done
164_13.mp3 done
164_14.mp3 done
164_15.mp3 done
164_16.mp3 done
164_17.mp3 done
164_18.mp3 done
164_19.mp3 done
164_20.mp3 done
164_21.mp3 done
164_22.mp3 done
164_23.mp3 done
164_24.mp3 done
164_25.mp3 done
164_26.mp3 done
164_27.mp3 done
164_28.mp3 done
164_29.mp3 done
164_30.mp3 done
164_31.mp3 done
164_32.mp3 done
167_10.mp3 done
167_11.mp3 done
167_12.mp3 done
167_13.mp3 done
167_14.mp3 done
167_15.mp3 done
167_16.mp3 done
167_17.mp3 done
167_18.mp3 done
167_19.mp3 done
167_20.mp3 done
167_21.mp3 done
167_22.mp3 done
167_23.mp3 done
167_24.mp3 done
167_25.mp3 done
167_26.mp3 done
167_27.mp3 done
167_28.mp3 done
167_29.mp3 done
167_30.mp3 done
167_31.mp3 done
170_10.mp3 done
170_11.mp3 done
170_12.mp3 done
170_13.mp3 done
170_14.mp3 done
170_15.mp3 done
170_16.mp3 done
170_17.mp3 done
170_18.mp3 done
170_19.mp3 done
170_20.mp3 done
170_21.mp3 done
170_22.mp3 done
176_10.mp3 done
176_11.mp3 done
176_12.mp3 done
176_13.m