In [2]:
"""



To use this script, make the following preparations:
- Create a working directory
- Inside the working directory, have a single ..._clips.npy file. This file will correspond to which
  character's model the generated data will be used to train.
- Inside the working directory, have a single folder named 'overlay_clips'
- Inside the overlay_clips directory, include one or more ..._clips.npy files of other characters. This will
  be used generate data for the known character's model to differentiate from other characters.
- Inside the working directory, create an empty subdirectory named 'output'
"""

import numpy as np
import librosa
import os
from scipy.io.wavfile import write
import pydub
import random



In [None]:
"""
Specify script parameters
"""
# specify the working directory; note that directory notation must use '/' rather than '\'
wd = 'F:/ZaknafeinII_Backup_02-02-22/daea/training_data_generation/id16/'

# Initially using 70%/15%/15% proportions for training, validation, and testing
train_proportion = 0.7
val_proportion = 0.15

In [11]:
# this will store the known character's clips
known_char_clips = None

# this will store the clips for all other characters
other_char_clips_array = []

# navigate the specified working directory to determine known character and other character clips arrays
wd_contents = os.listdir(wd)
wd_overlays_contents = os.listdir(wd+'overlay_clips')

for item in wd_contents:
    if os.path.isfile(os.path.join(wd, item)):
        print('For known character data: %s\n' % (item))
        
        # load the known character's ...clips.npy file
        with open(wd+item, 'rb') as f:
            known_char_clips = np.load(f)
            
print('For other character data:')
for item in wd_overlays_contents:
    # load the current other character's ...clips.npy file
        with open(wd+'overlay_clips/'+item, 'rb') as f:
            other_char_clips_array.append(np.load(f))
            
        print('\t%s' % (item))
    
            
other_char_clips_array = np.array(other_char_clips_array)

For known character data: id10016_merged_cleaned.wav_clips.npy

For other character data:
	id10130_merged_cleaned.wav_clips.npy
	id10168_merged_cleaned.wav_clips.npy
	id10484_merged_cleaned.wav_clips.npy


In [12]:
print('known__char_clips.shape:', known_char_clips.shape)
print('other_char_clips_array.shape:', other_char_clips_array.shape)

known__char_clips.shape: (1497, 88200)
other_char_clips_array.shape: (3, 1497, 88200)


In [10]:
"""
Shuffle all clips for all characters and set up the clips to be overlayed.
Because clip overlaying is based on averaging values, overlaying a known char clip
with itself returns itself.

There are n+1 overlay options that are selected randomly, where n is the number of
other characters to choose from. The +1 is the choice of no overlay (by overlaying with
itself).
"""

# we need to shuffle order of the clips
# shuffle known character
random.shuffle(known_char_clips)
# shuffle other characters
for index in range(0, len(other_char_clips_array)):
    random.shuffle(other_char_clips_array[index])
    

# overlay_clips stores the clips to overlay the known character
overlay_clips = []

# randomly select clips from the other characters to append to overlay clips
# can also do the same clip as the known character (which represents no overlay, just the normal voice)
# all scenarios have equal chance
while len(overlay_clips) < len(known_char_clips):
    # determine random index
    # randint() is inclusive; if the int is out of bounds we interpret this as no overlay
    rand_index = random.randint(0, len(other_char_clips_array))
    
    # no overlay
    if rand_index == len(other_char_clips_array):
        # just duplicate the clip from the known char at the same index
        overlay_clips.append( known_char_clips[len(overlay_clips)])
        
    # other character's clip overlay
    else:
        overlay_clips.append( other_char_clips_array[rand_index][len(overlay_clips)])

[564, 499, 577, 1070, 199, 1310, 489, 939, 542, 864, 741, 7, 558, 906, 1063, 1344, 916, 811, 686, 1, 1381, 44, 79, 269, 813, 1199, 1249, 274, 107, 472, 430, 1055, 1415, 1304, 842, 966, 730, 881, 1354, 897, 1033, 885, 202, 890, 356, 87, 446, 963, 237, 571, 1148, 1368, 555, 1203, 338, 1008, 1013, 337, 1460, 654, 65, 902, 339, 1470, 922, 1389, 942, 1016, 185, 1191, 1256, 1473, 1151, 348, 796, 383, 670, 95, 1236, 1313, 766, 1141, 153, 493, 249, 187, 1491, 378, 979, 119, 1341, 1054, 146, 328, 26, 1006, 388, 1000, 1212, 624, 560, 49, 737, 736, 559, 1227, 364, 1061, 816, 1127, 1235, 93, 1255, 1298, 1136, 618, 1330, 1443, 136, 600, 566, 753, 748, 664, 984, 718, 895, 609, 1225, 1195, 482, 1096, 786, 752, 460, 458, 1331, 1173, 1100, 1410, 1261, 88, 932, 1129, 25, 1186, 1247, 694, 536, 1429, 1132, 1038, 424, 1064, 74, 1056, 667, 732, 764, 1355, 523, 1095, 588, 1167, 590, 552, 1449, 182, 1404, 436, 259, 1207, 995, 110, 1323, 1131, 576, 1284, 1206, 1369, 1333, 602, 4, 1442, 1489, 578, 975, 324, 561

In [None]:
"""
I think we should try to maintain a 70%/15%/15% split for training, validation, and testing data.
Toward this end, I think that no clips that appear in one bracket should appear in another, even if
we are shuffling things.

These proportions can be specified above in the parameter section.
"""

# determine clip array slicing
end_frame_train = floor(train_proportion * len(known_char_clips))
end_frame_val = end_frame_train + floor(val_proportion * len(known_char_clips))

# slice the known character clips
known_char_clips_for_train = known_char_clips[0:end_frame_train]
known_char_clips_for_val = known_char_clips[end_frame_train:end_frame_val]
known_char_clips_for_test = known_char_clips[end_frame_val:]

# WAIT WE CAN OVERLAY FIRST

In [62]:
to_overlay = [other_char_clips_array[0][0], other_char_clips_array[1][0]]
test = sum(to_overlay)/len(to_overlay)
print(test.shape)

(88200,)


In [63]:
# with open('F:/ZaknafeinII_Backup_02-02-22/daea/training_data_generation/id16/output/test_overlay.wav', 'wb') as f:
#     np.save(f, test)
loc = 'F:/ZaknafeinII_Backup_02-02-22/daea/training_data_generation/id16/output/test_overlay.wav'
write(loc, 22050, test)

In [64]:
# # https://stackoverflow.com/questions/42492246/how-to-normalize-the-volume-of-an-audio-file-in-python


<_io.BufferedRandom name='F:/ZaknafeinII_Backup_02-02-22/daea/training_data_generation/id16/output/test_overlay_normalized.wav'>