In [1]:
import matplotlib.pyplot as plt
import librosa
import librosa.display
import os
import glob
import numpy as np
from tqdm import tqdm
from PIL import Image

# 1. 폴더 생성

In [5]:
def new_folder(folder_name):
    # 1. 제출 데이터
    submission_dir = './image_extraction_data'
    final_dir = './final_data'
    for dir_type in ['Melspectrogram', 'STFT', 'waveshow']:
        os.makedirs(f'{submission_dir}/{dir_type}/{folder_name}', exist_ok=True)
        
    # 2. 이미지 전처리 완료 저장 폴더
        os.makedirs(f'{final_dir}/{dir_type}/{folder_name}', exist_ok=True)

# 메인코드

In [6]:
def process_org_waveshow(data, folder_name, file_name, aug_mode, mode, sr):
    
    plt.figure(figsize=(12,4))
    librosa.display.waveshow(data, color='purple')
    plt.axis('off')
    plt.savefig(f'./image_extraction_data/{mode}/{folder_name}/{file_name}_{aug_mode}.png', bbox_inches='tight', pad_inches=0)
    plt.close()
    
def process_noise_waveshow(data, folder_name, file_name, aug_mode, mode, sr):
    
    noise = 0.05*np.random.randn(*data.shape)
    data_noised = data+noise
    
    plt.figure(figsize=(12,4))
    librosa.display.waveshow(data_noised, color='purple')
    plt.axis('off')
    plt.savefig(f'./image_extraction_data/{mode}/{folder_name}/{file_name}_{aug_mode}.png', bbox_inches='tight', pad_inches=0)
    plt.close()

def process_stretch_waveshow(data, folder_name, file_name, aug_mode, mode, sr):
    
    data_stretched = librosa.effects.time_stretch(data, rate=0.8)
    
    plt.figure(figsize=(12,4))
    librosa.display.waveshow(data_stretched, color='purple')
    plt.axis('off')
    plt.savefig(f'./image_extraction_data/{mode}/{folder_name}/{file_name}_{aug_mode}.png', bbox_inches='tight', pad_inches=0)
    plt.close()
    
def process_org_stft(data, folder_name, file_name, aug_mode, mode, sr):
    
    stft = librosa.stft(data)
    stft_db = librosa.amplitude_to_db(abs(stft))
    
    plt.figure(figsize=(12,4))
    librosa.display.specshow(stft_db, sr=sr)
    plt.axis('off')
    plt.savefig(f'./image_extraction_data/{mode}/{folder_name}/{file_name}_{aug_mode}.png', bbox_inches='tight', pad_inches=0)
    plt.close()
    
def process_noise_stft(data, folder_name, file_name, aug_mode, mode, sr):
    
    noise = 0.005*np.random.randn(*data.shape)
    data_noised = data+noise
    
    stft = librosa.stft(data_noised)
    stft_db = librosa.amplitude_to_db(abs(stft))
    
    plt.figure(figsize=(12,4))
    librosa.display.specshow(stft_db, sr=sr)
    plt.axis('off')
    plt.savefig(f'./image_extraction_data/{mode}/{folder_name}/{file_name}_{aug_mode}.png', bbox_inches='tight', pad_inches=0)
    plt.close()
    
def process_stretch_stft(data, folder_name, file_name, aug_mode, mode, sr):
    
    rate = 0.8 + np.random.random() * 0.4
    data_stretched = librosa.effects.time_stretch(data, rate=rate)
    
    stft = librosa.stft(data_stretched)
    stft_db = librosa.amplitude_to_db(abs(stft))
    
    plt.figure(figsize=(12,4))
    librosa.display.specshow(stft_db, sr=sr)
    plt.axis('off')
    plt.savefig(f'./image_extraction_data/{mode}/{folder_name}/{file_name}_{aug_mode}.png', bbox_inches='tight', pad_inches=0)
    plt.close()

def process_org_melspec(data, folder_name, file_name, aug_mode, mode, sr):
    
    stft = librosa.stft(data)
    
    mel_spec = librosa.feature.melspectrogram(S=abs(stft))

    mel_spec_db = librosa.amplitude_to_db(mel_spec, ref=np.max)
    
    plt.figure(figsize=(12,4))
    librosa.display.specshow(mel_spec_db, sr=sr)
    plt.axis('off')
    plt.savefig(f'./image_extraction_data/{mode}/{folder_name}/{file_name}_{aug_mode}.png', bbox_inches='tight', pad_inches=0)
    plt.close()
    
def process_noise_melspec(data, folder_name, file_name, aug_mode, mode, sr):
    
    stft = librosa.stft(data)
    
    mel_spec = librosa.feature.melspectrogram(S=abs(stft))

    mel_spec_db = librosa.amplitude_to_db(mel_spec, ref=np.max)
    
    noise = 0.005*np.random.randn(*mel_spec_db.shape)
    mel_spec_db_noised = noise + mel_spec_db
    
    mel_spec_db_noised_db = librosa.amplitude_to_db(mel_spec_db_noised, ref=np.max)
    
    plt.figure(figsize=(12,4))
    librosa.display.specshow(mel_spec_db_noised_db, sr=sr)
    plt.axis('off')
    plt.savefig(f'./image_extraction_data/{mode}/{folder_name}/{file_name}_{aug_mode}.png', bbox_inches='tight', pad_inches=0)
    plt.close()
    
def process_stretch_melspec(data, folder_name, file_name, aug_mode, mode, sr):
    
    rate = 0.8 + np.random.random() * 0.4
    data_stretched = librosa.effects.time_stretch(data, rate=rate)
    
    stft = librosa.stft(data_stretched)
    
    mel_spec = librosa.feature.melspectrogram(S=abs(stft))

    mel_spec_db = librosa.amplitude_to_db(mel_spec, ref=np.max)
    
    plt.figure(figsize=(12,4))
    librosa.display.specshow(mel_spec_db, sr=sr)
    plt.axis('off')
    plt.savefig(f'./image_extraction_data/{mode}/{folder_name}/{file_name}_{aug_mode}.png', bbox_inches='tight', pad_inches=0)
    plt.close()
    
# 모드와 증강 정의
MODES = {
    'waveshow' : {
        'org' : process_org_waveshow,
        'noise' : process_noise_waveshow,
        'stretch' : process_stretch_waveshow,
    },
    'STFT' : {
        'org' : process_org_stft,
        'noise' :process_noise_stft,
        'stretch' :process_stretch_stft,
    },
    'Melspectrogram' : {
        'org' : process_org_melspec,
        'noise' : process_noise_melspec,
        'stretch' :process_stretch_melspec,
    }   
}




if __name__ == '__main__':
    # 데이터 가져오기
    raw_data_path = './raw_data/'
    #폴더 구조 raw_data/*/*/*.wav
    raw_data_path_list = glob.glob(os.path.join(raw_data_path,'*','*','*.wav'))
    
    mode = 'Melspectrogram'
    aug_mode = 'noise'
    
    for raw_data in tqdm(raw_data_path_list):
        if raw_data != './raw_data\\raw_data\\jazz\\jazz.00054.wav':
            data, sr = librosa.load(raw_data)
            #print(data, sr)
            
            # 폴더 생성
            folder_name = raw_data.split('\\')[2]
            file_name = raw_data.split('\\')[-1]
            file_name = file_name.replace('.wav', '')
            #print(folder_name, file_name)
            
            new_folder(folder_name)
            
            start = 0
            end = 10
            start_sample = sr * start
            end_sample = sr * end
            data_selection = data[start_sample:end_sample]
            
            if mode in MODES and aug_mode in MODES[mode]:
                MODES[mode][aug_mode](data_selection, folder_name, file_name, aug_mode, mode, sr)
                

100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [01:29<00:00, 11.18it/s]


# 이미지 크기 리사이징

In [2]:
def expend2square(pil_img, background_color):
    width, height = pil_img.size
    
    if width == height:
        return pil_img
    
    elif width > height:
        result = Image.new(pil_img.mode, (width,width), background_color)
        result.paste(pil_img, (0,(width-height)//2))
        return result
    else:
        result = Image.new(pil_img.mode, (height,height), background_color)
        result.paste(pil_img, ((height-width)//2, 0))
        return result
    
def resize_with_padding(pil_img, newsize, background_color):
    img = expend2square(pil_img, background_color)
    img = img.resize((newsize[0], newsize[1]), Image.ANTIALIAS)
    
    return img

In [8]:
genre_folder_map = {
    'blue':'blue',
    'classical':'classical',
    'country':'country',
    'disco':'disco',
    'hiphop':'hiphop',
    'jazz':'jazz',
    'metal':'metal',
    'pop':'pop',
    'reggae':'reggae',
    'rock':'rock'
}

file_path = './image_extraction_data/'
file_path_list = glob.glob(os.path.join(file_path,'*','*','*.png'))

for path in tqdm(file_path_list):
    mode, genre, file_name = path.split('\\')[1:]
    
    if mode in ['waveshow', 'STFT', 'Melspectrogram']:
        img = Image.open(path)
        img_new = resize_with_padding(img, (255,255), (0,0,0))
        
        if genre in genre_folder_map:
            save_file_name = f'./final_data/{mode}/{genre_folder_map[genre]}/{file_name}'
            img_new.save(save_file_name, 'png')
    

  img = img.resize((newsize[0], newsize[1]), Image.ANTIALIAS)
100%|██████████████████████████████████████████████████████████████████████████████| 9006/9006 [03:10<00:00, 47.16it/s]
