In [None]:
import numpy as np
import tensorflow as tf
import random
import librosa
import glob
import os
from multiprocessing import Process
from multiprocessing.pool import Pool
from multiprocessing import Manager
from multiprocessing import Process
import time
class Model:

    def __init__(self, out_size = 10):
        self.data_path = data_path
        self.out_size = out_size
        

    def single_lstm(self, input_shape, output_bias = None):
        if output_bias is not None:
            output_bias = tf.keras.initializers.Constant(output_bias)
        model = tf.keras.models.Sequential()
        model.add(tf.keras.layers.Input(shape = input_shape))
        model.add(tf.keras.layers.Conv2D(filters = 32, kernel_size = (5,input_shape[1]), activation = 'relu'))
        model.add(tf.keras.layers.Reshape((input_shape[0]-4,32)))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.LSTM(64, return_sequences=True))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.LSTM(64))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layeres.Dropout(0.2))
        model.add(tf.keras.layers.Dense(1, activation = 'sigmoid', bias_initializer=output_bias))
        model.compile(loss=tf.keras.losses.BinaryCrossentropy(),
                      optimizer="adam",
                      metrics=[tf.keras.metrics.Recall(),
                               tf.keras.metrics.Precision(),
                               tfa.metrics.F1Score(num_classes=1, average='macro',threshold=0.5),
                               'accuracy'])
        print(model.summary())
        return model

class DataGenerator:
    
    def __init__(self, data_path, out_size, speech_len, validation_split = 0.1):
        self.data_path = data_path
        self.out_size = out_size
        self.speech_length = librosa.time_to_samples(speech_len)
        self.data_list = glob.glob(os.path.join(self.data_path,"CC_*"))
        random.shuffle(self.data_list)
        self.train = self.data_list[:-int(len(self.data_list)*validation_split)]
        self.valid = self.data_list[-int(len(self.data_list)*validation_split):]
        print(len(self.train), len(self.valid))
    def empty_sequence(self, n_part):
        return [[0 for _ in range(self.speech_length)] for _ in range(n_part)]
        
    def train_generator(self, batch_size):
        manager = Manager()
        while True:
            process = []
            self.out = manager.list()
            for _ in range(batch_size):
                p = Process(target = self.load_files, args = (self.train,))
                p.start()
                process.append(p)
            for p in process:
                p.join()
            out_data = []
            out_labels = []
            for data, label in self.out:
                if data is not None and label is not None:
                    out_data.append(data)
                    out_labels.append(label)
            if len(out_data) == 0 or len(out_labels) == 0:
                continue
            if len(np.array(out_data, dtype=object).shape)<4 or len(out_labels) == 0:
                continue
            yield np.array(out_data), np.array(out_labels)
        
    def valid_generator(self, batch_size):
        manager = Manager()
        while True:
            process = []
            self.out = manager.list()
            for _ in range(batch_size):
                p = Process(target = self.load_files, args = (self.valid,))
                p.start()
                process.append(p)
            for p in process:
                p.join()
            out_data = []
            out_labels = []
            for data, label in self.out:
                if data is not None and label is not None:
                    out_data.append(data)
                    out_labels.append(label)
            if len(out_data) == 0 or len(out_labels) == 0:
                continue
            if len(np.array(out_data, dtype=object).shape)<4 or len(out_labels) == 0:
                continue
            yield np.array(out_data), np.array(out_labels)
            
    def load_files(self, source_folder):
        np.random.seed()
        data_folder = np.random.choice(source_folder)
        speaker = np.random.choice(os.listdir(data_folder))
        folder = os.path.join(data_folder, speaker.decode("utf8"))
        files = sorted(os.listdir(folder))
        if len(files)<=self.out_size:
            audio = self.empty_sequence(self.out_size-len(files))
            label = [False for _ in range(self.out_size-len(files))]
            start_choice = 0
        else:
            start_choice = np.random.choice(range(len(files)-self.out_size))
            audio = []
            label = []
        for file in files[start_choice:start_choice+self.out_size]:
            label.append(str(file).endswith("1.wav"))
            wave, sr = librosa.load(os.path.join(folder,file))                        
            if len(wave)<=self.speech_length:
                pad = [0 for _ in range(self.speech_length - len(wave))]
                wave = pad+list(wave)
                audio.append(wave)
            else:
                start_ind = np.random.choice(range(len(wave)-self.speech_length))
                features = wave[start_ind:start_ind+self.speech_length]
                features = librosa.feature.mfcc(features, sr= sr)
                audio.append(features)
        try:
            audio = np.stack(audio, axis = -1)
            self.out.append([audio,True in label])
        except:            
            self.out.append([None, None])
      

In [None]:
import tensorflow_addons as tfa
class LossAndErrorPrintingCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        with open('CNN_LSTM_bias_1_5.txt','a',encoding = 'utf8') as fw:
            fw.write("For epoch {}".format(epoch))
            fw.write("\n")
            fw.write("Loss is       {:7.2f}, val_loss is      {:7.2f}.".format(logs["loss"],logs["val_loss"]))
            fw.write("\n")
            fw.write("Accuracy is   {:7.2f}, val_accuracy is  {:7.2f}.".format(logs["accuracy"],logs["val_accuracy"]))
            fw.write("\n")
            fw.write("Recall is     {:7.2f}, Presicion is     {:7.2f}.".format(logs["recall"],logs["precision"]))
            fw.write("\n")
            fw.write("Val_Recall is {:7.2f}, Val_Presicion is {:7.2f}.".format(logs["val_recall"],logs["val_precision"]))
            fw.write("\n")
            fw.write("F1 is         {:7.2f}, Val_F1 is        {:7.2f}.".format(logs["f1_score"],logs["val_f1_score"]))
            fw.write("\n")
            fw.write("="*100) 
            
class F1_Score(tf.keras.metrics.Metric):

    def __init__(self, name='f1_score', **kwargs):
        super().__init__(name=name, **kwargs)
        self.f1 = self.add_weight(name='f1', initializer='zeros')
        self.precision_fn = Precision(thresholds=0.5)
        self.recall_fn = Recall(thresholds=0.5)

    def update_state(self, y_true, y_pred, sample_weight=None):
        p = self.precision_fn(y_true, y_pred)
        r = self.recall_fn(y_true, y_pred)
        # since f1 is a variable, we use assign
        self.f1.assign(2 * ((p * r) / (p + r + 1e-6)))

    def result(self):
        return self.f1

    def reset_states(self):
        # we also need to reset the state of the precision and recall objects
        self.precision_fn.reset_states()
        self.recall_fn.reset_states()
        self.f1.assign(0)            
            
if __name__ == "__main__":
    data_path = '/home/ubuntu/ProjectVietVu/splited_data'
    out_size = 5
    speech_len = 1
    data_gen = DataGenerator(data_path, out_size, speech_len)
    train_gen = data_gen.train_generator(batch_size = 16)
    valid_gen = data_gen.valid_generator(batch_size = 8)
    print('start')
    X, y = next(train_gen)
    print(X.shape, y.shape)
    base_model = Model(out_size)
    checkpoint = tf.keras.callbacks.ModelCheckpoint("CNN_LSTM_bias_1_5_f1_v2.h5", monitor='val_f1_score', verbose=1, save_best_only=True, mode='max')
    a = []
    b = []
    print("Begin calculate bias weight")
    bt = time.time()
    for i in range(100):
        X, y = next(train_gen)
        a.append(sum(y))
        b.append(len(y))
    pos = sum(a)
    neg = sum(b) - pos
    total = sum(b)
    print("Possitive: %s"%pos)
    print("Negative: %s"%neg)
    print("In times: %s" %(time.time() - bt))
    weight_for_1 = (1 / pos)*(total)/2.0 
    weight_for_0 = (1 / neg)*(total)/2.0
    class_weight = {0: weight_for_0, 1: weight_for_1}
    initial_bias = np.log([pos/neg])
    single_model = base_model.single_lstm(X.shape[1:], initial_bias)
    history = single_model.fit(train_gen,
                                steps_per_epoch = 100,
                                epochs = 20,
                                verbose = 1,
                                shuffle = False,
                                validation_data = valid_gen,
                                validation_steps = 20,
                                class_weight = class_weight,
                                callbacks = [checkpoint,LossAndErrorPrintingCallback()])

In [None]:
np.stack([a.T,a.T], axis = -1)

In [None]:
np.array(a)

In [None]:
weight_for_0 = (1 / 1)*(10)/2.0 
weight_for_1 = (1 / 9)*(10)/2.0
class_weight = {0: weight_for_0, 1: weight_for_1}
class_weight

In [None]:
np.array([1,2,3])

In [None]:
X.shape

In [None]:
X, y = next(valid_gen)
single_model.evaluate(X,y)

In [None]:
single_model.predict(X)

In [None]:
y

In [None]:
class_weight

In [None]:
total = 485+240
neg = 485
pos = 240
weight_for_0 = (1 / neg)*(total)/2.0 
weight_for_1 = (1 / pos)*(total)/2.0

class_weight = {0: weight_for_0, 1: weight_for_1}

print('Weight for class 0: {:.2f}'.format(weight_for_0))
print('Weight for class 1: {:.2f}'.format(weight_for_1))

In [None]:
import os

In [None]:
data_path = '/home/ubuntu/ProjectVietVu/splited_data'

In [None]:
import glob
a = glob.glob(os.path.join(data_path,"*/*/*.wav"))
len(a)

In [None]:
list_folder = sorted(glob.glob(os.path.join(data_path,"*/*/")))
available = [len(os.listdir(folder))-5 for folder in list_folder if len(os.listdir(folder))>5]

In [None]:
pos = []
neg = []
for folder in list_folder:
    files = sorted(os.listdir(folder))
    if len(files)<5:
        continue
    for i in range(len(files)-5):
        fs = files[i:i+5]
        if any([f.endswith("1.wav") for f in fs]):
            pos.append([os.path.join(folder, f) for f in fs])
        else:
            neg.append([os.path.join(folder, f) for f in fs])

In [None]:
pos[:5], neg[:5]

In [None]:
len(pos), len(neg)

In [None]:
1304/3394

In [None]:
len(a)

In [None]:
import librosa

In [None]:
import time
begin = time.time()
for i, f in enumerate(a):
    librosa.load(f)
    if i%100 == 0:
        print(i, time.time() - begin)
        begin = time.time()

In [None]:
data_gen = DataGenerator(data_path, out_size, speech_len)
train_gen = data_gen.train_generator(batch_size = 8)
X, y = next(train_gen)
print(X.shape, y.shape)