### 라이브러리 추가

In [1]:
import matplotlib.pyplot as plt
import librosa
import librosa.display

import os
import glob
import numpy as np

from tqdm import tqdm
from PIL import Image

ModuleNotFoundError: No module named 'librosa'

### 폴더 생성 

In [2]:
def new_folder(folder_name) : 
    # 1. 제출 데이터 
    submission_dir = "./image_extraction_data"
    final_dir = "./final_data"
    for dir_type in ["MelSepctorgram", "STFT", "waveshow"] : 
        
        ######## 1. 음성 데이터 -> 이미지 저장 하는 폴더 
        os.makedirs(
            f"{submission_dir}/{dir_type}/{folder_name}" , exist_ok=True
        )
        
        ####### 2. 이미지 -> 전처리 완료된 이미지 저장 하는 폴더 
        os.makedirs(
            f"{final_dir}/{dir_type}/{folder_name}" , exist_ok=True
        )
        

### 메인코드 

In [None]:
def process_org_waveshow(data_section, folder_name, file_name, aug_mode, mode, sr) :
    
    # waveshow 원본 데이터 
    plt.figure(figsize=(12,4))
    librosa.display.waveshow(data_section, color="purple")
    plt.axis('off')
    plt.savefig(f"./image_extraction_data/{mode}/{folder_name}/{file_name}_{aug_mode}.png",
               bbox_inches='tight', pad_inches=0)
    plt.close()

    
def process_noise_waveshow(data_section, folder_name, file_name, aug_mode, mode, sr) :
    
    # 노이즈 추가 
    noise = 0.05 * np.random.randn(*data_section.shape)
    data_noise = data_section + noise
    
    plt.figure(figsize=(12,4))
    librosa.display.waveshow(data_noise, color="purple")
    plt.axis('off')
    plt.savefig(f"./image_extraction_data/{mode}/{folder_name}/{file_name}_{aug_mode}.png",
               bbox_inches='tight', pad_inches=0)
    plt.close()
    
def process_stretch_waveshow(data_section, folder_name, file_name, aug_mode, mode, sr) :
    
    # stretch 추가 
    data_stretch = librosa.effects.time_stretch(data_section, rate=0.8)
    
    plt.figure(figsize=(12,4))
    librosa.display.waveshow(data_stretch, color="purple")
    plt.axis('off')
    plt.savefig(f"./image_extraction_data/{mode}/{folder_name}/{file_name}_{aug_mode}.png",
               bbox_inches='tight', pad_inches=0)
    plt.close() 

    
def process_org_stft(data_section, folder_name, file_name, aug_mode, mode, sr) :
    
    # stft 계산 
    stft = librosa.stft(data_section)
    
    # stft -> dB 결과 변환 
    stft_db = librosa.amplitude_to_db(abs(stft))
    
    # stft 원본 데이터 
    plt.figure(figsize=(12,4))
    librosa.display.specshow(stft_db, sr=sr, x_axis='time', y_axis='hz')
    plt.axis('off')
    plt.savefig(f"./image_extraction_data/{mode}/{folder_name}/{file_name}_{aug_mode}.png",
               bbox_inches='tight', pad_inches=0)
    plt.close()

    
def process_noise_stft(data_section, folder_name, file_name, aug_mode, mode, sr) :
    
    # noise 
    noise_stft = 0.005 * np.random.randn(*data_section.shape)
    noise_stft_data = data_section + noise_stft
    
    # stft 계산 
    stft_noise = librosa.stft(noise_stft_data)
    
    # stft -> dB 결과 변환 
    stft_db_noise = librosa.amplitude_to_db(abs(stft_noise))
    
    # stft 원본 데이터 
    plt.figure(figsize=(12,4))
    librosa.display.specshow(stft_db_noise, sr=sr, x_axis='time', y_axis='hz')
    plt.axis('off')
    plt.savefig(f"./image_extraction_data/{mode}/{folder_name}/{file_name}_{aug_mode}.png",
               bbox_inches='tight', pad_inches=0)
    plt.close()
    


def process_stretch_stft(data_section, folder_name, file_name, aug_mode, mode, sr) :
    

    # stretching 기법 적용
    rate_stft = 0.8 + np.random.random() * 0.4 # 0.8 ~ 1.2 사이의 랜덤한 비율로 Time stretching
    stretch_data_section = librosa.effects.time_stretch(
        data_section, rate=rate_stft
    )
    
    # stft 계산 
    stft_stretch = librosa.stft(stretch_data_section)
    
    # stft -> dB 결과 변환 
    stft_db_strtch = librosa.amplitude_to_db(abs(stft_stretch))
    
    # stft 원본 데이터 
    plt.figure(figsize=(12,4))
    librosa.display.specshow(stft_db_strtch, sr=sr, x_axis='time', y_axis='hz')
    plt.axis('off')
    plt.savefig(f"./image_extraction_data/{mode}/{folder_name}/{file_name}_{aug_mode}.png",
               bbox_inches='tight', pad_inches=0)
    plt.close()
 

def process_org_melspec(data_section, folder_name, file_name, aug_mode, mode, sr) :
    
    # stft 계산 
    stft_mel = librosa.stft(data_section)
    
    # 멜 스펙트로그램 계산 
    mel_spec = librosa.feature.melspectrogram(S=abs(stft_mel))
    
    # dB 변환
    mel_spec_db = librosa.amplitude_to_db(mel_spec, ref=np.max)
    
    plt.figure(figsize=(12,4))
    librosa.display.specshow(mel_spec_db, sr=sr, x_axis='time', y_axis='hz')
    plt.axis('off')
    plt.savefig(f"./image_extraction_data/{mode}/{folder_name}/{file_name}_{aug_mode}.png",
               bbox_inches='tight', pad_inches=0)
    plt.close()


def process_noise_melspec(data_section, folder_name, file_name, aug_mode, mode, sr) : 
     # stft 계산 
    stft_noise = librosa.stft(data_section)
    
    # 멜 스펙트로그램 계산
    mel_spec_noise = librosa.feature.melspectrogram(S=abs(stft_noise))
    
    # dB 변환
    mel_spect_noise_db = librosa.amplitude_to_db(mel_spec_noise, ref=np.max)
    
    # noise 추가 
    mel_noise = 0.005 * np.random.randn(*mel_spect_noise_db.shape)
    aug_noise_mel = mel_spect_noise_db + mel_noise 
    
    # dB 변환
    aug_noise_db = librosa.amplitude_to_db(aug_noise_mel, ref=np.max)
    
    # 시각화 
    plt.figure(figsize=(12,4))
    librosa.display.specshow(aug_noise_db, sr=sr, x_axis='time', y_axis='hz')
    plt.axis('off')
    plt.savefig(f"./image_extraction_data/{mode}/{folder_name}/{file_name}_{aug_mode}.png",
               bbox_inches='tight', pad_inches=0)
    plt.close()

def process_stretch_melspec(data_section, folder_name, file_name, aug_mode, mode, sr) : 
    rate_mel = np.random.uniform(low=0.8, high=1.2)
    stretched_mel = librosa.effects.time_stretch(data_section, rate=rate_mel)
    
    # stft 계산 
    stft_mel_stretch = librosa.stft(stretched_mel)
    
    # 멜 스펙트로그램 계산 
    mel_spec_stretch = librosa.feature.melspectrogram(S=abs(stft_mel_stretch))
        
    # dB 변환
    mel_spec_stretch_db = librosa.amplitude_to_db(mel_spec_stretch, ref=np.max)
    
    # 시각화
    plt.figure(figsize=(12,4))
    librosa.display.specshow(mel_spec_stretch_db, sr=sr, x_axis='time', y_axis='hz')
    plt.axis('off')
    plt.savefig(f"./image_extraction_data/{mode}/{folder_name}/{file_name}_{aug_mode}.png",
               bbox_inches='tight', pad_inches=0)
    plt.close()
        
##### 모드와 증강 정의 
MODES = {
    
    'waveshow' : {
        'org' : process_org_waveshow,
        'noise' : process_noise_waveshow,
        'stretch' : process_stretch_waveshow
    },
    'stft' : {
        'org' : process_org_stft,
        'noise' : process_noise_stft,
        'stretch' : process_stretch_stft,
    },
    'MelSepctorgram' : {
        'org' : process_org_melspec,
        'noise' : process_noise_melspec,
        'stretch' : process_stretch_melspec
    }
}


if __name__ == "__main__" :
    
    # raw 데이터 가져오기 
    raw_data_path = "./raw_data/"
    
    """
    폴더구조 
    ./raw_data/*/*/*.wav
    """
    raw_data_path_list = glob.glob(os.path.join(raw_data_path, "*", "*", "*.wav"))
    
    """
    모드와 증강 모드 에 따른 처리 함수 호출
    모드 3가지 : waveshow, stft, melspec
    
    aug_mode : org, noise, stretch
    """
    mode = "MelSepctorgram"
    aug_mode = "stretch"
    
    for raw_data in tqdm(raw_data_path_list) : 
        if raw_data != './raw_data/raw_data/jazz/jazz.00054.wav' : 
            data, sr = librosa.load(raw_data)
            
            #### 폴더 생성 #### 
            # def new_folder(folder_name)
            folder_name = raw_data.split("/")[3]
            file_name = raw_data.split("/")[-1]
            file_name = file_name.replace(".wav", "")
            
            new_folder(folder_name)
            
            ### org : 0 ~ 10초 컷한 이미지  / waveshow : 0 ~ 10초 컷한 이미지 
            ### melspec : 0 ~ 10초 컷한 이미지 / stft : 0 ~ 10초 컷 한 이미지 
            """
            #### 0초 ~ 10초 구간 데이터 추출 하기
            """
            start_time = 0
            end_time = 10
            start_sample = sr * start_time
            end_sample = sr * end_time
            data_section = data[start_sample : end_sample]
            
            if mode in MODES and aug_mode in MODES[mode] : 
                MODES[mode][aug_mode](data_section, folder_name, file_name, aug_mode, mode, sr)
            
            

    

 87%|██████████████████████████████████▊     | 871/1000 [01:43<00:15,  8.28it/s]