# Transform collected YouTube Files to Trainingsdata
We want to use two kinds of trainingdata. First we want to split every song in junks of circa 1 second. These junks get sorted by similarity and then labeled accordingly. We will use these junks as trainingdata to create new junks of audio. Additionally we want to store the order of junk-lables. This order will be used to create a new orders.

## Wavesets
Every track from the training data gets split in wavesets with a duration of aproximattly 1 second.

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import librosa
import librosa.display
import soundfile
from IPython.display import display, Audio

In [4]:
np.random.seed(42)

### Loading data (demo track):

In [70]:
folder_path = 'demoData'
paths = librosa.util.find_files(folder_path, ext=['wav']) 
songs = []

for path in paths: 
    data, sr = librosa.load(path, sr = None, mono = True)
    songs.append(data)

AttributeError: 'list' object has no attribute 'shape'

### Waveset class

In [55]:
class Wavesets:
    def __init__(self, audio_data, sample_rate):
            self.data = audio_data
            self.sr = sample_rate
    
    def get_splitPoints(self, data):
        split_points = np.argwhere(
        (np.sign(data[:-1])==-1) & (np.sign(data[1:])==1)
        )
        return split_points
    
    def plot_wavesets(self, data, split_points, plot_range):
        data_range = data[plot_range[0]:plot_range[1]]
        plt.figure(figsize=(15, 5))
        plt.plot(data_range)
        plt.hlines(0.0, plot_range[0], plot_range[1], color='r')
        plt.vlines(split_points[(plot_range[0]<split_points) & (split_points<plot_range[1])], data_range.max(), data_range.min(), color='g')
        plt.xlabel("samples")
        plt.ylabel("amplitude")
        plt.title("Wavesets")
        plt.show();
        
    def resize_splits(self, splits, min_size):
        new_splits = []
        last_split = 0
        for split in splits:
            if (split - last_split) > min_size:
                last_split = split
                new_splits.append(split)
        return np.array(new_splits)
    

    def generate_wavesets(self, data, split_points):
        wavesets = []
        # we iterate over split points 2 times: once with no offset
        # and a 2nd time with an offset of 1 which gives us
        # the start and the end - zip allows us to iterate
        # over a collection of arrays in parallel
        for start, end in zip(split_points[:-1, 0], split_points[1:, 0]):
            wavesets.append(data[start:end+1])
            
        return wavesets
    
    def get_Wavesets(self):
        split_points = self.get_splitPoints(self.data)
        split_points = self.resize_splits(split_points, self.sr*2)
        #self.plot_wavesets(data, split_points, plot_range=[0, self.sr * 60 * 7])
        wavesets = self.generate_wavesets(self.data, split_points)
        return wavesets
    
    @classmethod
    def enumerate_Wavesets(self, wavesets):
        enum_wavesets = []
        num = 0
        for ws in wavesets:
            enum_wavesets.append([ws, num])
            num = num+1
        return enum_wavesets

### Generate Wavesets
We can now generate all Wavesets. We also store their position in the song for later use

In [56]:
all_wavesets = []

for song in songs:
    wavesets = Wavesets(song, sr).get_Wavesets()
    enum_wavesets = Wavesets.enumerate_Wavesets(wavesets) # the position in the song gets stored as an integer-value. The data has now this structure: [[amplitude, amplitude, amplitude],position_in_song]]
    all_wavesets.append(enum_wavesets)

## Label wavesets
The wavesets of all songs get ordered by similiarity. After that they get split in 1000 categories, i.e. they get labeled with a value between 0-999.

First we create the spectrogram of each waveset. For this we use the FFT

In [57]:
def fft_wavesets(wavesets, n_fft, hop_length, win_length):
    fft_data = []
    for ws in wavesets:
        fft = librosa.stft(data, n_fft=n_fft, hop_length=hop_length, win_length=win_length)
        fft_data.append(fft)
    return fft_data 

In [65]:
WIN_LENGTH = int(sr/4)
HOP_LENGTH = int(sr/6)
N_FFT = int(sr/2)

for waveset in all_wavesets:
    fft_data = fft_wavesets(waveset[0], N_FFT, HOP_LENGTH, WIN_LENGTH)
    waveset[0] = fft_data # We replace the PCM-data with FFT-data

AttributeError: 'list' object has no attribute 'shape'