In [1]:
import pandas as pd
import numpy as np
import scipy
import soundfile as sf
import random

from itertools import product
from scipy.io import wavfile
import librosa
import os

import time

In [2]:
squareSize = 100
combiSize = 7

In [3]:
def loadAudio(path): 
    audiodata, sr = sf.read(path)
    
    if not sr == 44100:
        audiodata = librosa.resample(audiodata, sr, 44100)
        sr = 44100
    
    halfseconds = halfSecondSplit(audiodata,sr)
    return halfseconds, sr

In [4]:
def halfSecondSplit(samples, samplerate):
    """Takes 'Audio Data' samples and 'Samplerate', splits it into 0.5 second fragments"""

    #disregards less than half a second of audio files
    halfSecondSamples = samplerate/2
    samples = samples[:int((len(samples)-(len(samples) % halfSecondSamples)))]
    array = np.reshape(samples, (int(samples.size/halfSecondSamples), int(halfSecondSamples)))

    return array

In [5]:
def wavsToData(audiofiledir):
    timestamp = time.perf_counter()
    
    global squareSize
    audioFragments = []
    iDs = []
    files = []
    data = []
    for filename in os.listdir(audiofiledir):
        files.append(filename)
        speakerId = filename.split('-')[0]        
        if not speakerId in iDs:
            iDs.append(speakerId)
            
    if squareSize > len(iDs):
        squareSize = len(iDs)
        
    for speaker in iDs[:squareSize]:
        samples = []
        for file in files:
            if file.split('-')[0] == speaker:
                HalfSecondSamples, sr = loadAudio(audiofiledir + file)
                for halfSecond in HalfSecondSamples:
                    
                    samples.append(convertToMFCC(halfSecond, sr))
#                     samples.append(convertToSpectro(halfSecond, sr))
    
            if len(samples) > squareSize:
                break
                
        data.append([speaker, samples[:squareSize]])
    
    duration = time.perf_counter() - timestamp
    print('duration: ' + str(duration))
        
    makeBalancedCombos(data)

In [6]:
def convertToMFCC(data, samplerate):
    mfcc = librosa.feature.mfcc(y = data, sr = samplerate, n_mfcc=40)
    return mfcc

In [7]:
def convertToSpectro(data, samplerate):
    Spectro = librosa.feature.melspectrogram(data, sr=samplerate, window=scipy.signal.windows.hann, n_fft=int(samplerate/4), hop_length = int(samplerate/8))
    return Spectro

In [8]:
def saveFragmentsToNP(data):
    np.save('Output.npy', data)

In [9]:
def flattenArray(data):
    data = np.array(data)
    flatData = data.flatten()
    return flatData

In [10]:
def makeBalancedCombos(data):
    timestamp = time.perf_counter()

    speaker1 = []
    speaker2 = []
    labels = []
    
    global squareSize
    global combiSize
    
    counter = 0
    total = (2*(combiSize - 1)) * squareSize**2
    
    for x in range(squareSize):
        for index, sample in enumerate(data[x][1]):
            for y in range(1, combiSize): 
                samplesOne = data[x][1][(index + y)%squareSize] # Take a sample from the same row (speaker)

                speaker1.append(sample)
                speaker2.append(samplesOne)
                labels.append(True)
                
                samplesTwo = data[(x + y)%squareSize][1][random.randrange(1, squareSize)] #Take a ramdom sample from a different row (speaker)

                speaker1.append(sample)
                speaker2.append(samplesTwo)
                labels.append(False)
    
                counter += 2
            print('Progress: ' + str(int(counter/total*100)) + '%', end='\r')
    
    duration = time.perf_counter() - timestamp
    print('duration: ' + str(duration))
    
    makeArrays(speaker1, speaker2, labels)

In [11]:
def makeArrays(speakerlist1, speakerlist2, labelslist):
    timestamp = time.perf_counter()

    global squareSize
    global combiSize

    speaker1 = np.array(speakerlist1)
    speaker2 = np.array(speakerlist2)
    labels = np.array(labelslist)
    
    np.save('./outputs/44khz' + str(squareSize) + '_' + str(combiSize) + '_' + 'speaker1.npy', speaker1)
    np.save('./outputs/44khz' + str(squareSize) + '_' + str(combiSize) + '_' + 'speaker2.npy', speaker2)
    np.save('./outputs/44khz' + str(squareSize) + '_' + str(combiSize) + '_' + 'labels.npy', labels)
                          
    print('verhouding true/false: ')
    print(np.unique(labels, return_counts=True))
    print(speaker1.shape, speaker2.shape, labels.shape)
    
    duration = time.perf_counter() - timestamp
    print('duration: ' + str(duration))

In [12]:
def main():
    wavsToData("./Datasets/LibriSpeech/train-clean-100/")

In [13]:
main()

duration: 221.78783049806952


Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%Progress: 0%

Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 53%Progress: 54%Progress: 54%Progress: 54%Progress: 54%Progress: 54%Progress: 54%Progress: 54%Progress: 54%Progress: 54%Progress: 54%Progress: 54%Progress: 54%Progress: 54%Progress: 54%Progress: 54%Progress: 54%Progress: 54%Progress: 54%Progress: 54%Progress: 54%Progress: 54%Progress: 54%Progress: 54%Progress: 54%Progress: 54%Progress: 54%Progre

Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progress: 80%Progre

verhouding true/false: 
(array([False,  True]), array([60000, 60000]))
(120000, 40, 44) (120000, 40, 44) (120000,)
duration: 2.7915570167824626
