# Mixing different signals for input data creation

## Imports and drive mounting

In [13]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [0]:
import librosa
import numpy as np
import os
from os import listdir
from os.path import isfile, join

In [0]:
import sys
sys.path.append('/content/gdrive/My Drive/Nagy házi/')
from audio_masking import *

In [4]:
baseFolder = "/content/gdrive/My Drive/Nagy házi/audiobooks/single_stft"
baseFileNames = [(baseFolder + os.sep + f) for f in listdir(baseFolder) if isfile(join(baseFolder, f))]
print(len(baseFileNames))

3387


## Extracting speakers

Identifying the 10 different speakers and storing their names

In [5]:
speakers = []

for bfn in baseFileNames:
  speaker = "_".join(bfn.split("__")[-1].split("_")[:-1])
  if(speaker not in speakers):
    speakers.append(speaker)
    print("Added: " + speaker)

print(speakers)

Added: Joseph_Ugoretz
Added: Rosalind_Wills
Added: Debra_Lynn
Added: Stewart_Wills
Added: Paul_Hansen
Added: Zachary_Brewster-Geisz
Added: Jemma_Blythe
Added: Kristen_Ferreri
Added: Graham_Thomsen
Added: Stuart_Bell
['Joseph_Ugoretz', 'Rosalind_Wills', 'Debra_Lynn', 'Stewart_Wills', 'Paul_Hansen', 'Zachary_Brewster-Geisz', 'Jemma_Blythe', 'Kristen_Ferreri', 'Graham_Thomsen', 'Stuart_Bell']


## Creating pairs

The following code is for pairing randomly selected 5s length signals from different speakers. Duplicate pairing is taken into consideration and is prohibited. 

The workflow of the program:


1.   Choosing a speaker form the ten
2.   Selecting 10 random utterances
3.   Choosing 11 utterances to each one from all the other speakers
4.   Creating pairs 
5.   Continue with next speaker (number of options decreases in each iteration)



In [6]:
import random
random.seed(123)
np.random.seed(123)

pairs = []
alreadySelected = []
for speaker1 in speakers:
  selected1 = [bfn for bfn in baseFileNames if (speaker1 in bfn)]
  alreadySelected.append(speaker1)
  indices1 = random.sample(range(len(selected1)), 10)
  for i in range(10):
    utterance1 = selected1[indices1[i]]
    for speaker2 in [s for s in speakers if(s not in alreadySelected)]:
      selected2 = [bfn for bfn in baseFileNames if (speaker2 in bfn)]
      indices2 = random.sample(range(len(selected2)), 11)
      for j in range(11):
        utterance2 = selected2[indices2[j]]
        pairs.append([utterance1, utterance2])

len(pairs)

4950

Loading first pair for later shaping

In [7]:
X0=np.load(pairs[0][0], allow_pickle=False, fix_imports=True)
outputShape = len(pairs), 4, *X0.shape
outputArray = np.ndarray(outputShape, dtype=X0.dtype)
outputArray.shape

(4950, 4, 257, 431)

Loading each pair to the putput array and creating masks

In [8]:
counter = 0
for pair in pairs:
  X1 = np.load(pair[0], allow_pickle=False, fix_imports=True)
  X2 = np.load(pair[1], allow_pickle=False, fix_imports=True)
  [M1, M2] = IRMbeta([X1, X2], 0.5)
  outputArray[counter] = np.array([X1, X2, M1, M2])
  counter += 1
  print(str(counter/len(pairs)*100) + "%")

0.0202020202020202%
0.0404040404040404%
0.06060606060606061%
0.0808080808080808%
0.10101010101010101%
0.12121212121212122%
0.1414141414141414%
0.1616161616161616%
0.18181818181818182%
0.20202020202020202%
0.2222222222222222%
0.24242424242424243%
0.26262626262626265%
0.2828282828282828%
0.30303030303030304%
0.3232323232323232%
0.3434343434343434%
0.36363636363636365%
0.3838383838383838%
0.40404040404040403%
0.4242424242424243%
0.4444444444444444%
0.4646464646464647%
0.48484848484848486%
0.5050505050505051%
0.5252525252525253%
0.5454545454545455%
0.5656565656565656%
0.5858585858585859%
0.6060606060606061%
0.6262626262626263%
0.6464646464646464%
0.6666666666666667%
0.6868686868686869%
0.7070707070707071%
0.7272727272727273%
0.7474747474747475%
0.7676767676767676%
0.787878787878788%
0.8080808080808081%
0.8282828282828283%
0.8484848484848486%
0.8686868686868687%
0.8888888888888888%
0.9090909090909091%
0.9292929292929294%
0.9494949494949495%
0.9696969696969697%
0.9898989898989898%
1.01010101

## Saving dataset

Splitting data to train, validation and test sets

In [0]:
indices = random.sample(range(len(outputArray)), len(outputArray))
trainIndices = indices[:int(len(indices)*0.8)]
validIndices = indices[int(len(indices)*0.8):int(len(indices)*0.95)]
testIndices = indices[int(len(indices)*0.95):len(indices)]

Creating datasets and saving them in hdf5 format

In [14]:
import h5py
h5Path = "/content/gdrive/My Drive/Nagy házi/audiobooks/train_data/data.h5"

if os.path.isfile(h5Path):
  os.remove(h5Path)

counter = 0
with h5py.File(h5Path, "a") as f:
  trainSet = f.create_dataset("train", shape=(len(trainIndices), 4, *X0.shape), dtype = X0.dtype, chunks=(5, 4, *X0.shape))
  validSet = f.create_dataset("valid", shape=(len(validIndices), 4, *X0.shape), dtype = X0.dtype, chunks=(5, 4, *X0.shape))
  testSet = f.create_dataset("test", shape=(len(testIndices), 4, *X0.shape), dtype = X0.dtype, chunks=(5, 4, *X0.shape))

  for i in range(len(trainIndices)):
    trainSet[i] = outputArray[trainIndices[i]]
    counter += 1
    print(str(counter/len(outputArray)*100) + "%")

  for i in range(len(validIndices)):
    validSet[i] = outputArray[validIndices[i]]
    counter += 1
    print(str(counter/len(outputArray)*100) + "%")

  for i in range(len(testIndices)):
    testSet[i] = outputArray[testIndices[i]]
    counter += 1
    print(str(counter/len(outputArray)*100) + "%")


0.0202020202020202%
0.0404040404040404%
0.06060606060606061%
0.0808080808080808%
0.10101010101010101%
0.12121212121212122%
0.1414141414141414%
0.1616161616161616%
0.18181818181818182%
0.20202020202020202%
0.2222222222222222%
0.24242424242424243%
0.26262626262626265%
0.2828282828282828%
0.30303030303030304%
0.3232323232323232%
0.3434343434343434%
0.36363636363636365%
0.3838383838383838%
0.40404040404040403%
0.4242424242424243%
0.4444444444444444%
0.4646464646464647%
0.48484848484848486%
0.5050505050505051%
0.5252525252525253%
0.5454545454545455%
0.5656565656565656%
0.5858585858585859%
0.6060606060606061%
0.6262626262626263%
0.6464646464646464%
0.6666666666666667%
0.6868686868686869%
0.7070707070707071%
0.7272727272727273%
0.7474747474747475%
0.7676767676767676%
0.787878787878788%
0.8080808080808081%
0.8282828282828283%
0.8484848484848486%
0.8686868686868687%
0.8888888888888888%
0.9090909090909091%
0.9292929292929294%
0.9494949494949495%
0.9696969696969697%
0.9898989898989898%
1.01010101

In [0]:
drive.flush_and_unmount()