# preprocessing

In [None]:
import csv
import os
import random
import shutil
from typing import List

import librosa
import matplotlib.pyplot as plt
import numpy as np
import scipy.io.wavfile
from tqdm import tqdm

from IPython.display import Audio 
import IPython.display as ipd


In [None]:
%matplotlib inline

In [None]:
sample_file = 'train/audio/bed/00f0204f_nohash_0.wav'
sample_file2 = 'train/audio/bed/00f0204f_nohash_1.wav'

In [None]:
ipd.Audio(sample_file, autoplay=True)

In [None]:
librosa.load(sample_file)

In [None]:
SAMPLE_RATE = 16_000

In [None]:
# TODO
# 1. Podanie klas, które są podstawowymi klasami
# 2. Podanie klas, ktore są zbiorem "other"
# 3. Podzielenie backgroud_noice na krótsze odcinki czasu i zaaplikowanie do zbioru danych treningowych/walidacyjnych/testowych
# 4. Przygotowanie różnego sposobu kodowania sygnału np. mvcc i inne
# 5. Augmentacja danych (noise_addition,backgroud_noice, pitch_scaling)
# 5. Przygotowanie różnych modeli treningowych (rnn+lstm, rnn+gru, cnn, cnn - x2) + hyperparameter training, vision transformer
#    ten sam z którego wcześniej korzystałem

# Damian transformera dodać

In [None]:
basic_classes = [
    'yes', 'no', 'up', 'down', 'left', 'right', 'on', 'off', 'stop', 'go'
]

# this are the "other" classes also named "silence"
additional_classes = [
    'zero', 'wow', 'two', 'tree', 'three', 'six', 'sheila', 'seven', 'one', 'nine', 'marvin', 'house', 'happy', 'four', 'five', 'eight', 'dog', 'cat', 'bird', 'bed'
]

all_classes = basic_classes + additional_classes

assert len(all_classes) == 30, "Incorrent number of classes"

In [None]:
def create_short_background_noices_from_path(file_name: str, current_folder_path: str, destination_folder_path: str):
    current_file_path = current_folder_path + "/" + file_name
    signal, sr = librosa.load(current_file_path, sr = SAMPLE_RATE)
    small_signals = np.array_split(signal, np.arange(SAMPLE_RATE, len(signal), SAMPLE_RATE))
    for i, small_signal in enumerate(small_signals):
        path_to_save_signal = destination_folder_path + "/" + file_name[:-4] + f"_{str(i)}.wav"
        print(path_to_save_signal) # for debugging
        scipy.io.wavfile.write(filename=path_to_save_signal, rate=SAMPLE_RATE, data=small_signal)

def create_short_background_noices():
    folder_name = r"train\audio\_background_noise_"
    new_folder_name = r"train\audio\_short_background_noise_"
    
    if not os.path.exists(new_folder_name):
        os.makedirs(new_folder_name)
    
    for file in os.listdir(folder_name):
        if file.endswith(".wav"):
            create_short_background_noices_from_path(file_name=file, 
                                                     current_folder_path=folder_name,
                                                     destination_folder_path=new_folder_name)
    

In [None]:
create_short_background_noices()

In [None]:
def get_all_validation_files():
    with open('train/validation_list.txt') as ls:
        files = [row[0] for row in csv.reader(ls)]
        return files
    
def get_all_testing_files():
    with open('train/testing_list.txt') as ls:
        files = [row[0] for row in csv.reader(ls)]
        return files

validation_file_paths = get_all_validation_files() 
testing_file_paths = get_all_testing_files()

In [None]:
def pad_signal(signal: np.ndarray):
    size = signal.shape[0]
    if size < SAMPLE_RATE:
        return np.pad(signal, (0,SAMPLE_RATE - size), constant_values=(0,0))
    elif size > SAMPLE_RATE:
        return signal[:SAMPLE_RATE]
    return signal

In [None]:
def create_necessary_folders_for_wav_files():
    for class_name in all_classes:
        # create folders for training
        os.makedirs("train/audio_augmentations", exist_ok=True)
        os.makedirs("train/audio_augmentations/no_augmentations", exist_ok=True)
        os.makedirs(f"train/audio_augmentations/no_augmentations/{class_name}", exist_ok=True)
        os.makedirs("train/audio_augmentations/pitch_scaling", exist_ok=True)
        os.makedirs(f"train/audio_augmentations/pitch_scaling/{class_name}", exist_ok=True)
        os.makedirs("train/audio_augmentations/random_noice", exist_ok=True)
        os.makedirs(f"train/audio_augmentations/random_noice/{class_name}", exist_ok=True)
        
        # create folders for validation
        os.makedirs("validation", exist_ok=True)
        os.makedirs(f"validation/{class_name}", exist_ok=True)
        
        # create folders for testing
        os.makedirs("test", exist_ok=True)
        os.makedirs(f"test/{class_name}", exist_ok=True)
        
    
        
def split_sound_to_train_validation_test_sets():
    split_counter = {
        "training": 0,
        "validation": 0,
        "testing": 0
                    }
    for class_name in all_classes:
        for file in os.listdir(f"train/audio/{class_name}"):
            combined = class_name + '/' + file
            
            origin_path = f"train/audio/{class_name}/{file}"
            signal, sr = librosa.load(origin_path, sr=SAMPLE_RATE)
            signal = pad_signal(signal)
            
            if combined in validation_file_paths:
                destination_path = f"validation/{class_name}/{file}"
                split_counter["validation"] += 1
                scipy.io.wavfile.write(filename=destination_path, rate=SAMPLE_RATE, data=signal)
                print("VALIDATION", combined)
                
            elif combined in testing_file_paths:
                destination_path = f"test/{class_name}/{file}"
                scipy.io.wavfile.write(filename=destination_path, rate=SAMPLE_RATE, data=signal)
                split_counter["testing"] += 1
                print("TESTING", combined)
                
            else:
                destination_path = f"train/audio_augmentations/no_augmentations/{class_name}/{file}"
                scipy.io.wavfile.write(filename=destination_path, rate=SAMPLE_RATE, data=signal)
                split_counter["training"] += 1
                print("TRAINING", combined)
                
    print(split_counter)

create_necessary_folders_for_wav_files()    
split_sound_to_train_validation_test_sets()


In [None]:
# split_counter = {'training': 51088, 'validation': 6798, 'testing': 6835}

In [None]:
def get_all_noice_files():
    noice_directory = "train/audio/_short_background_noise_"
    noice_files = []
    for file in os.listdir(noice_directory):
        noice_files.append(file)
    return noice_files

def get_random_noice_signal(all_sound_files: List[str]) -> np.ndarray:
    noice_directory = "train/audio/_short_background_noise_"
    random_file = random.choice(all_sound_files)
    path = noice_directory + '/' + random_file
    signal, _ = librosa.load(path, sr=SAMPLE_RATE)
    return pad_signal(signal)

all_noice_files = get_all_noice_files()

get_random_noice_signal(all_noice_files)

In [None]:
def combine_signals(s1: np.ndarray, s2: np.ndarray, s2_factor: float):
    return s1 + s2 * s2_factor

def create_noice_signal_augmentation(original_signal: np.ndarray):
    return combine_signals(original_signal,
                           get_random_noice_signal(all_noice_files),
                           0.008)

def create_pitch_signal_augmentation(original_signal: np.ndarray, shift_by: int):
    #print("shift_by", shift_by)
    return librosa.effects.pitch_shift(original_signal, sr=SAMPLE_RATE, n_steps=shift_by)

sample_signal, _ = librosa.load(sample_file, sr=SAMPLE_RATE)

#ipd.Audio(create_pitch_signal_augmentation(sample_signal), rate=SAMPLE_RATE, autoplay=True)
num_semitoner = int(np.random.randint(-1,4,1))
ipd.Audio(create_pitch_signal_augmentation(sample_signal, num_semitoner), rate=SAMPLE_RATE, autoplay=True)

In [None]:
def create_noice_augmentations():
    for class_name in all_classes:
        folder_name = f"train/audio_augmentations/no_augmentations/{class_name}"
        for file in os.listdir(folder_name):
            print(file)
            origin_path = folder_name + '/' + file
            signal, sr = librosa.load(origin_path, sr=SAMPLE_RATE)
            signal = pad_signal(signal)
            for i in range(5):
                augmented_signal = create_noice_signal_augmentation(signal)
                destination_path = f"train/audio_augmentations/random_noice/{class_name}/{i}_{file}"
                scipy.io.wavfile.write(filename=destination_path, rate=SAMPLE_RATE, data=augmented_signal)
                
def create_pitch_augmentations():
    for class_name in all_classes:
        print(class_name)
        folder_name = f"train/audio_augmentations/no_augmentations/{class_name}"
        for file in os.listdir(folder_name):
            print(class_name, file)
            origin_path = folder_name + '/' + file
            signal, sr = librosa.load(origin_path, sr=SAMPLE_RATE)
            signal = pad_signal(signal)
            for i in range(5):
                augmented_signal = create_pitch_signal_augmentation(signal, i-1) #from -1 to 4
                destination_path = f"train/audio_augmentations/pitch_scaling/{class_name}/{i}_{file}"
                scipy.io.wavfile.write(filename=destination_path, rate=SAMPLE_RATE, data=augmented_signal)
create_noice_augmentations()            
create_pitch_augmentations()

# extracting_features

In [None]:
def create_mfcc_feature(signal: np.ndarray):
    mfccs =  librosa.feature.mfcc(y=signal, sr=SAMPLE_RATE, n_mfcc=25)
    return mfccs

def create_mel_spectogram(signal: np.ndarray):
    S = librosa.feature.melspectrogram(y=signal, sr=SAMPLE_RATE, n_mels=128, fmax=8000)
    S_dB = librosa.power_to_db(S, ref=np.max)
    
    return S_dB

In [None]:
# sample plot

for i in range(1):
    sample_signal, sr = librosa.load(sample_file, sr=SAMPLE_RATE)
    mfcc_matrix = create_mfcc_feature(sample_signal)
    print(mfcc_matrix.shape)
librosa.display.specshow(mfcc_matrix, 
                         x_axis='time', sr=SAMPLE_RATE)
plt.colorbar(format="%+2.f")
plt.show()

In [None]:
librosa.display.waveshow(sample_signal, sr=sr)
plt.plot();

In [None]:
# waveplot, mfccs and mel-spectogram one per class_name
for class_name in all_classes:
    print(class_name)
    plt.figure(figsize=(3,3))
    folder_name = f"train/audio_augmentations/no_augmentations/{class_name}"
    sound_file = os.listdir(folder_name)[0]
    signal, _ = librosa.load(folder_name + "/" + sound_file, sr=SAMPLE_RATE)
    librosa.display.waveshow(signal, sr=SAMPLE_RATE)
    plt.title(class_name)
    plt.plot();
    plt.show()
    
    mfccs = create_mfcc_feature(signal)
    plt.figure(figsize=(3,3))
    librosa.display.specshow(mfccs, 
                         y_axis='time', sr=SAMPLE_RATE)
    plt.colorbar(format="%+2.f")
    plt.title(class_name)
    plt.show()
    
    
    mel_spectogram = create_mel_spectogram(signal)
    print("mel_spectogram.shape", mel_spectogram.shape)
    plt.figure(figsize=(3,3))
    librosa.display.specshow(mel_spectogram, x_axis='time',
                            y_axis='mel', sr=SAMPLE_RATE,
                            fmax=8000)
    plt.colorbar(format='%+2.0f dB')
    plt.title(f'{class_name} Mel-frequency spectrogram')
    plt.show()
    print('=======================')

### mappings from classes to indices / vice-versa

In [None]:
class_name_to_idx = {class_name: idx for idx, class_name in enumerate(all_classes)}
idx_to_class_name = {idx: class_name for idx, class_name in enumerate(all_classes)}

In [None]:
def create_file_list_and_labels_for_folder(folder_name: str):
    file_full_paths = []
    class_names = []
    for class_name in all_classes:
        for file in os.listdir(f"{folder_name}/{class_name}"):
            full_path = f"{folder_name}/{class_name}/{file}"
            file_full_paths.append(full_path)
            class_names.append(class_name)
                
    return file_full_paths, class_names

x_train_no_augmentations_file_list, y_train_no_augmentations_class_names = create_file_list_and_labels_for_folder(
    "train/audio_augmentations/no_augmentations"
)
x_train_pitch_augmentation_file_list, y_train_pitch_augmentation_class_names = create_file_list_and_labels_for_folder(
    "train/audio_augmentations/pitch_scaling"
)
x_train_noice_augmentation_file_list, y_train_noice_augmentation_class_names = create_file_list_and_labels_for_folder(
    "train/audio_augmentations/random_noice"
)
x_validation_file_list, y_validation_class_names = create_file_list_and_labels_for_folder(
    "validation"
)
x_test_file_list, y_test_class_names = create_file_list_and_labels_for_folder(
    "test"
)

In [None]:
def get_signal_from_path(path: str):
    return librosa.load(path, sr=SAMPLE_RATE)

def get_numpy_numeric_vector_from_str_labels(labels: List[str]):
    numeric_labels = [None] * len(labels)
    for i, label in enumerate(labels):
        numeric_labels[i] = class_name_to_idx[label]
    return np.array(numeric_labels)

def prepare_numpy_X_mfcc_and_y_from_files(file_full_paths: List[str], labels: List[str]):
    mfccs = np.zeros((len(file_full_paths), 25, 32))
    
    print(file_full_paths[0]) # for debugging
    for i in tqdm(range(len(file_full_paths))):
        signal, sr = get_signal_from_path(file_full_paths[i])
        mfccs[i] = create_mfcc_feature(signal)
    
    y_numeric  = get_numpy_numeric_vector_from_str_labels(labels)
    
    return mfccs, y_numeric

def prepare_numpy_X_mel_spectogram_and_y_from_files(file_full_paths: List[str], labels: List[str]):
    mel_spectograms = np.zeros((len(file_full_paths), 128, 32))
    
    print(file_full_paths[0]) # f0r debugging
    for i in tqdm(range(len(file_full_paths))):
        signal, sr = get_signal_from_path(file_full_paths[i])
        mel_spectograms[i] = create_mel_spectogram(signal)
    
    y_numeric  = get_numpy_numeric_vector_from_str_labels(labels)
    
    return mel_spectograms, y_numeric



In [None]:
def get_and_save_all_mfccs_and_labels():
    
    train no augmentation
    X_mfcc, y = prepare_numpy_X_mfcc_and_y_from_files(x_train_no_augmentations_file_list, y_train_no_augmentations_class_names)
    np.save("numpy_arrays/X_train_mfcc_unaugmented.npy", X_mfcc)
    np.save("numpy_arrays/y_train_unaugmented.npy", y)
    
    # train noice augmentation
    X_mfcc, y = prepare_numpy_X_mfcc_and_y_from_files(x_train_noice_augmentation_file_list, y_train_noice_augmentation_class_names)
    np.save("numpy_arrays/X_train_mfcc_noice_augmentation.npy", X_mfcc)
    np.save("numpy_arrays/y_train_mfcc_noice_augmentation.npy", y)
    
    # train pitch augmentation
    X_mfcc, y = prepare_numpy_X_mfcc_and_y_from_files(x_train_pitch_augmentation_file_list, y_train_pitch_augmentation_class_names)
    np.save("numpy_arrays/X_train_mfcc_pitch_augmentation.npy", X_mfcc)
    np.save("numpy_arrays/y_train_mfcc_pitch_augmentation.npy", y)
    
    # validation
    X_mfcc, y = prepare_numpy_X_mfcc_and_y_from_files(x_validation_file_list, y_validation_class_names)
    np.save("numpy_arrays/X_validation_mfcc.npy", X_mfcc)
    np.save("numpy_arrays/y_validation_mfcc.npy", y)
    
    # test
    X_mfcc, y = prepare_numpy_X_mfcc_and_y_from_files(x_test_file_list, y_test_class_names)
    np.save("numpy_arrays/X_test_mfcc.npy", X_mfcc)
    np.save("numpy_arrays/y_test_mfcc.npy", y)

    
def get_and_save_all_mel_spectograms_and_labels():
    train noice augmentation
    X, y = prepare_numpy_X_mel_spectogram_and_y_from_files(x_train_noice_augmentation_file_list, y_train_noice_augmentation_class_names)
    np.save("numpy_arrays/X_train_mel_spectogram_noice_augmentation.npy", X)
    np.save("numpy_arrays/y_train_mel_spectogram_noice_augmentation.npy", y)
    
    # train pitch augmentation
    X, y = prepare_numpy_X_mel_spectogram_and_y_from_files(x_train_pitch_augmentation_file_list, y_train_pitch_augmentation_class_names)
    np.save("numpy_arrays/X_train_mel_spectogram_pitch_augmentation.npy", X)
    np.save("numpy_arrays/y_train_mel_spectogram_pitch_augmentation.npy", y)
    
    # train no augmentation
    X, y = prepare_numpy_X_mel_spectogram_and_y_from_files(x_train_no_augmentations_file_list, y_train_no_augmentations_class_names)
    np.save("numpy_arrays/X_train_mel_spectogram_unaugmented.npy", X)
    np.save("numpy_arrays/y_train_mel_spectogram_unaugmented.npy", y)
    
    # validation
    X, y = prepare_numpy_X_mel_spectogram_and_y_from_files(x_validation_file_list, y_validation_class_names)
    np.save("numpy_arrays/X_validation_mel_spectogram.npy", X)
    np.save("numpy_arrays/y_validation_mel_spectogram.npy", y)
    
    # test
    X, y = prepare_numpy_X_mel_spectogram_and_y_from_files(x_test_file_list, y_test_class_names)
    np.save("numpy_arrays/X_test_mel_spectogram.npy", X)
    np.save("numpy_arrays/y_test_mel_spectogram.npy", y)

In [None]:
os.makedirs(f"numpy_arrays", exist_ok=True)

In [None]:
get_and_save_all_mfccs_and_labels()

In [None]:
get_and_save_all_mel_spectograms_and_labels()