# 오디오 신호 이해하기
1. 오디오 파일 읽기
2. 1차 노이즈 제거
- 아기 울음소리의 평균적인 hz가 있도록 하기

In [None]:
import librosa
import numpy as np
import librosa.display
import csv
from scipy.signal import savgol_filter
from scipy import signal
from IPython.display import Audio
import matplotlib.pyplot as plt
import soundfile as sf

In [None]:
def trans_mel(file, n_fft, hop_length, n_mels):
    ##Set your filenames
    audio_filename = file

    ## 1. #Read audio file ----------------------
    # y : -1.0 ~ 1.0 사이의 실수로 오디오 데이터
    # sr : 초당 샘플의 개수(Hz)로 샘플링 속도
    y, sr = librosa.load(audio_filename)
    duration = librosa.get_duration(y=y, sr=sr)
    # print('오디오길이:', duration)


    ## 2. 1차 노이즈 제거 -------------------------------
    sos = signal.butter(10, 350, 'hp', fs=sr, output='sos')
    y_nr1  = signal.sosfilt(sos, y)


    ## 3. Mel-spectogram로 변환 ---------------------------
    # 하이퍼파라미터 설정
    # n_fft = 2048        # 주파수 해상도, 높을수록
    # hop_length = 512    # 시간 해상도, 높을수록, 프레임 간격 이동
    # n_mels = 128        # Mel 필터 개수, 높을수록
    # Mel-Spectrogram 생성
    S = librosa.feature.melspectrogram(y=y_nr1, sr=sr, n_mels=n_mels, fmax=None, n_fft=n_fft, hop_length=hop_length)
    S_db = S
    # S = librosa.power_to_db(S, ref=np.max)


    ## 4. 2차 노이즈 제거 ----------------------------------
    # 노이즈 감소: -60 dB 이하를 0으로 설정
    S[S_db < -50] = 0

    return y, sr, S

### 전저리 적용 후 들어보기 ...

In [None]:
file = '/content/drive/MyDrive/Aivle_빅프/donateacry_corpus/belly_pain/ae5f103b-5fee-442f-bb1b-d9d0570c46ab-1431533857965-1.7-m-26-bp.wav'
y, sr, S = trans_mel(file, n_fft = 2048, hop_length = 512, n_mels = 128)
print(len(S))
print(S.shape)
print(S)
y_reconstructed = librosa.feature.inverse.mel_to_audio(S, sr = sr, n_fft = 2048, hop_length = 512)
Audio(data=y_reconstructed, rate=sr)

128
(128, 300)
[[1.16878286e-26 9.89539933e-27 5.58182597e-27 ... 6.07209831e-09
  1.77030512e-05 2.13817769e-03]
 [8.95620599e-23 7.02764175e-23 1.72506702e-23 ... 7.28898640e-09
  2.05026911e-05 2.92506601e-03]
 [3.22572352e-20 2.35638002e-20 3.21167087e-21 ... 9.34193909e-09
  2.53757004e-05 4.21631208e-03]
 ...
 [1.02549184e-14 2.55027041e-15 2.76504932e-26 ... 5.96734695e-15
  3.57054578e-06 1.04640610e-03]
 [1.01724605e-14 2.52988356e-15 1.76992980e-26 ... 6.27260058e-15
  3.54121760e-06 1.03781562e-03]
 [1.01116824e-14 2.51481015e-15 1.78525539e-26 ... 1.32592366e-14
  3.52130878e-06 1.03198238e-03]]


In [None]:
file = '/content/drive/MyDrive/Aivle_빅프/donateacry_corpus/burping/5afc6a14-a9d8-45f8-b31d-c79dd87cc8c6-1430757039803-1.7-m-48-bu.wav'
y, sr, S = trans_mel(file, n_fft = 2048, hop_length = 512, n_mels = 128)
print(len(S))
print(S.shape)
print(S)
y_reconstructed = librosa.feature.inverse.mel_to_audio(S, sr = sr, n_fft = 2048, hop_length = 512)
Audio(data=y_reconstructed, rate=sr)

128
(128, 287)
[[1.15471653e-26 9.88925594e-27 1.35294435e-26 ... 5.36023651e-11
  4.73768543e-09 4.65274826e-06]
 [8.95618305e-23 7.02766828e-23 1.73511366e-23 ... 6.79226052e-11
  9.38706601e-09 9.67948425e-06]
 [3.22572103e-20 2.35637707e-20 3.20835796e-21 ... 9.36002143e-11
  1.72444638e-08 1.78760964e-05]
 ...
 [1.02548372e-14 2.55024953e-15 1.01369025e-25 ... 5.59759606e-15
  1.49774023e-07 1.59966566e-04]
 [1.01724354e-14 2.52987923e-15 3.07665609e-26 ... 7.02627352e-15
  1.48595427e-07 1.58701836e-04]
 [1.01115631e-14 2.51477917e-15 1.70342114e-26 ... 5.19762416e-15
  1.47791354e-07 1.57839441e-04]]


In [None]:
file = '/content/drive/MyDrive/Aivle_빅프/donateacry_corpus/discomfort/10A40438-09AA-4A21-83B4-8119F03F7A11-1430925142-1.0-f-26-dc.wav'
y, sr, S = trans_mel(file, n_fft = 2048, hop_length = 512, n_mels = 128)
print(len(S))
print(S.shape)
print(S)
y_reconstructed = librosa.feature.inverse.mel_to_audio(S, sr = sr, n_fft = 2048, hop_length = 512)
Audio(data=y_reconstructed, rate=sr)

128
(128, 302)
[[2.08998443e-29 1.42156583e-19 1.40648497e-20 ... 6.37744236e-10
  2.19823247e-07 5.71107190e-06]
 [2.17107921e-29 3.17966306e-19 5.96187089e-20 ... 8.56475315e-10
  1.80116776e-07 4.97722293e-06]
 [2.25606850e-29 1.57786747e-18 2.04926850e-18 ... 1.27104644e-09
  9.79676033e-08 3.60815767e-06]
 ...
 [1.06576992e-24 1.20206344e-22 4.89564229e-21 ... 3.20227751e-14
  8.61676361e-05 2.35161359e-03]
 [8.80949568e-25 1.09766005e-22 3.90988792e-21 ... 3.00291525e-14
  8.53411606e-05 2.32907018e-03]
 [7.74133497e-25 6.68380941e-23 2.45569965e-21 ... 4.86402650e-14
  8.47886956e-05 2.31400020e-03]]


In [None]:
file = '/content/drive/MyDrive/Aivle_빅프/donateacry_corpus/hungry/02c3b725-26e4-4a2c-9336-04ddc58836d9-1430726196216-1.7-m-04-hu.wav'
y, sr, S = trans_mel(file, n_fft = 2048, hop_length = 512, n_mels = 128)
print(len(S))
print(S.shape)
print(S)
y_reconstructed = librosa.feature.inverse.mel_to_audio(S, sr = sr, n_fft = 2048, hop_length = 512)
Audio(data=y_reconstructed, rate=sr)

128
(128, 299)
[[6.03300650e-17 2.32520633e-16 1.58526568e-16 ... 1.69082094e-15
  4.99164298e-13 2.19931676e-09]
 [1.33058408e-16 8.80770021e-16 5.80283824e-16 ... 5.17225993e-15
  5.70371400e-13 2.32520623e-09]
 [2.50822721e-15 4.39004127e-15 3.61089601e-15 ... 1.23258766e-14
  6.72938170e-13 2.43320745e-09]
 ...
 [5.96283429e-15 1.48667364e-15 2.74324298e-20 ... 1.56064442e-20
  5.60631103e-14 2.48555824e-10]
 [5.19625991e-15 1.29546132e-15 7.39395992e-21 ... 9.64016059e-21
  5.54443210e-14 2.45835222e-10]
 [4.76774483e-15 1.18847177e-15 6.52058103e-21 ... 5.05537135e-21
  5.50358803e-14 2.44038292e-10]]


### 신호 추출하기

In [None]:
import pandas as pd
import librosa
import numpy as np
import os
from tqdm import tqdm  # tqdm 임포트

# 데이터 경로와 라벨 매핑
path = '/content/drive/MyDrive/Aivle_빅프/donateacry_corpus/'
labels = {
    'belly_pain': path + 'belly_pain/',
    'burping': path + 'burping/',
    'discomfort': path + 'discomfort/',
    'hungry': path + 'hungry/',
    'tired': path + 'tired/'
}

# 최대 열 길이 계산
max_features = 0

# 모든 파일에서 최대 열 길이 계산
for label, folder_path in labels.items():
    print(f"Calculating max features for folder: {label}")
    for filename in tqdm(os.listdir(folder_path), desc=f"Processing {label}", unit="file"):
        if filename.endswith(".wav"):
            # 오디오 파일 로드 및 특징 추출
            audiofile = os.path.join(folder_path, filename)
            y, sr, fingerprint = trans_mel(audiofile, n_fft=2048, hop_length=512, n_mels=128)
            feature_length = fingerprint.flatten().shape[0]
            max_features = max(max_features, feature_length)

print(f"Maximum feature length: {max_features}")

# 결과를 저장할 데이터프레임 초기화
columns = list(range(max_features)) + ['label']
X = pd.DataFrame(columns=columns, dtype='float32')

# 현재 데이터프레임 인덱스 초기화
current_index = 0

# 데이터프레임 생성
for label, folder_path in labels.items():
    print(f"Processing folder: {label}")
    for filename in tqdm(os.listdir(folder_path), desc=f"Processing {label}", unit="file"):
        if filename.endswith(".wav"):
            # 오디오 파일 로드 및 특징 추출
            audiofile = os.path.join(folder_path, filename)
            y, sr, fingerprint = trans_mel(audiofile, n_fft=2048, hop_length=512, n_mels=128)
            print(f'y: {y}')
            print(f'sr: {sr}')
            print(f'fingerprint: {fingerprint}')

#             # 2D 배열을 1D 벡터로 변환
#             feature_vector = fingerprint.flatten()

#             # 벡터 크기를 max_features에 맞추기
#             feature_vector = np.pad(feature_vector, (0, max_features - len(feature_vector)), 'constant')
#             feature_vector = feature_vector[:max_features]  # 초과하면 자르기

#             # 라벨 추가
#             row_data = np.append(feature_vector, str(label))

#             # 데이터프레임에 추가
#             X.loc[current_index] = row_data
#             current_index += 1

# # 데이터프레임 출력
# X

In [None]:
X.to_csv(path_or_buf='/content/drive/MyDrive/Aivle_빅프/donateacry_corpus/exp2_testing.csv')