# Generating Custom Imbalanced Data
Here we will be using the test data collected from 'Data Preprocessing.ipynb' and augment the number of MFCC features of each person to create a custom imbalanced input.

In [2]:
import numpy as np    # Importing essential libraries
import random

In [3]:
name1 = 'test_IB4011'      # Available files for augmenting and collecting data
name2 = 'test_IS1000c'
name3 = 'test_TS3010c'
name4 = 'train'
file = name1
speakers = [1,2,3,4]
samples = [60,100,150,190]    # Set the samples per person for new data
randomize = False             # Option for randomization

In [4]:
X = np.load('./results/mel_save_'+file+'.npy')
Y = np.load('./results/mel_labels_'+file+'.npy')

In [5]:
mel_collection = {}            # Collecting all the MFCC features for each person separately
for speaker in speakers:
    mel_collection[speaker] = []
    
for i in range(len(X)):
    if Y[i] in speakers:
        mel_collection[Y[i]].append(X[i])

In [6]:
X_custom_test = []
Y_custom_test = []
custom_timestamps = [] 
time = 0

for i,speaker in enumerate(speakers):          # Sampling MFCC features per person for Augmented Data
    mel_count = len(mel_collection[speaker])
    if samples[i]>mel_count:
        print("Error: Larger number of samples specified for speaker "+str(speaker))
        print("Number of samples specified : "+str(samples[i]))
        print("Number of samples available : "+str(mel_count))
        break
    if randomize == True:
        indices = random.sample(range(0,mel_count),samples[i])
    else:
        indices = range(samples[i])
    for index in indices:
        X_custom_test.append(mel_collection[speaker][index])
        Y_custom_test.append(speaker)
        custom_timestamps.append([time,time+1])
        time+=1

In [7]:
np.save('./custom_tests/mel_save_test_custom_2.npy',X_custom_test)     # Saving the Augmented data created
np.save('./custom_tests/mel_labels_test_custom_2.npy',Y_custom_test)
np.save('./custom_tests/timestamps_save_custom_2.npy', custom_timestamps)