In [1]:
import os
from dotenv import load_dotenv

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm

import kaldiio

from scipy.signal import welch
import random
from sklearn.manifold import TSNE
import plotly.graph_objects as go

import plotly.io as pio

pio.renderers.default = "notebook"

from IPython.display import Audio

import tensorflow as tf
import tensorflow_io as tfio

load_dotenv("../.env")
tf.config.list_physical_devices("GPU")

# conda install -c conda-forge tqdm


2023-04-24 23:24:32.225211: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-04-24 23:24:32.264164: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-04-24 23:24:32.264713: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-24 23:24:33.729207: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:06:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-24 23:24:37.453296: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are 

[]

In [4]:
PROJECT_ROOT = os.getenv("PROJECT_ROOT")
os.chdir(PROJECT_ROOT)

CLIPS_PATH = os.getenv("CLIPS_PATH")

VALIDATED_LIST_PATH = os.path.join(os.getenv("CLIPS_META_PATH"), "validated.tsv")
XVECTOR_RESULT_PATH = os.getenv("XVECTOR_RESULT_PATH")
XVECTOR_SCP_PATH = os.path.join(XVECTOR_RESULT_PATH, "xvector.scp")

In [97]:
speakers_xvectors = kaldiio.load_scp(XVECTOR_SCP_PATH)
valid_speakers = set(speakers_xvectors.keys())

raw_clips_meta = pd.read_table(VALIDATED_LIST_PATH)
raw_clips_meta = raw_clips_meta[raw_clips_meta["client_id"].isin(valid_speakers)]


def get_path(row):
    return f"{CLIPS_PATH}/{row}.mp3"

In [6]:
from tensorflow.keras.layers import (
    Input,
    Conv2D,
    Concatenate,
    LeakyReLU,
    BatchNormalization,
    Flatten,
    AveragePooling2D,
    Dense,
    Dropout,
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.models import Model

In [7]:
HYPER_PARAMS = {
    # Model parameters
    "window-width": 64,
    "mel-bands": 256,
    "x-vector-dim": 512,
    # Training parameters
    "batch-size": 32,
    "epochs": 100,
    "learning-rate": 0.0001,
}

# Model parameters
mel_spectrogram_shape = (
    HYPER_PARAMS["window-width"],
    HYPER_PARAMS["mel-bands"],
    1,
)  # Replace window_size and num_mel_bands with your values

# Leaky ReLU activation function
leaky_relu = LeakyReLU(alpha=0.2)

# Mel-spectrogram input
mel_spectrogram_input = Input(shape=mel_spectrogram_shape, name="mel_spectrogram_input")

# Convolutional layers
x = Conv2D(16, (3, 3), padding="same", activation=leaky_relu)(mel_spectrogram_input)
x = BatchNormalization()(x)
x = AveragePooling2D(pool_size=(2, 2))(x)

# Convolutional layers
x = Conv2D(32, (3, 3), padding="same", activation=leaky_relu)(mel_spectrogram_input)
x = BatchNormalization()(x)
x = AveragePooling2D(pool_size=(2, 2))(x)

x = Conv2D(64, (3, 3), padding="same", activation=leaky_relu)(x)
x = BatchNormalization()(x)
x = AveragePooling2D(pool_size=(2, 2))(x)

x = Conv2D(128, (3, 3), padding="same", activation=leaky_relu)(x)
x = BatchNormalization()(x)
x = AveragePooling2D(pool_size=(2, 2))(x)

x = Flatten()(x)

# X-vector input
x_vector_input = Input(shape=(HYPER_PARAMS["x-vector-dim"],), name="x_vector_input")

# Concatenate flattened CNN output with x-vector input
combined_input = Concatenate()([x, x_vector_input])

# Dense layers
y = Dense(128, activation=leaky_relu)(combined_input)
y = Dropout(0.1)(y)
y = Dense(256, activation=leaky_relu)(y)
y = Dropout(0.1)(y)
y = Dense(256, activation=leaky_relu)(y)
y = Dropout(0.1)(y)
y = Dense(512, activation=leaky_relu)(y)
output = Dense(HYPER_PARAMS["mel-bands"], activation="linear")(y)

# Construct the model
model = Model(inputs=[mel_spectrogram_input, x_vector_input], outputs=output)
model.summary()

# Compile the model
optimizer = Adam(learning_rate=HYPER_PARAMS["learning-rate"])
loss_fn = MeanSquaredError()
model.compile(optimizer=optimizer, loss=loss_fn)

2023-04-24 22:42:37.090091: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-24 22:42:37.095127: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set
2023-04-24 22:42:37.096014: E tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:927] could not open file to read NUMA node: /sys/bus/pci/devices/0000:06:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-24 22:42:37.096202: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:06:00.0 name: NVIDIA GeForce GTX 1080 Ti computeCapability: 6.1
coreClock: 1.645GHz coreCount: 28 deviceMemorySize: 11.00GiB deviceMemoryBandwidth

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
mel_spectrogram_input (InputLay [(None, 64, 256, 1)] 0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 64, 256, 32)  320         mel_spectrogram_input[0][0]      
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 64, 256, 32)  128         conv2d_1[0][0]                   
__________________________________________________________________________________________________
average_pooling2d_1 (AveragePoo (None, 32, 128, 32)  0           batch_normalization_1[0][0]      
______________________________________________________________________________________________

In [8]:
from tensorflow.keras.callbacks import TensorBoard

tensorboard_callback = TensorBoard(histogram_freq=1)


2023-04-24 22:42:39.658630: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2023-04-24 22:42:39.658678: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2023-04-24 22:42:39.658755: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1365] Profiler found 1 GPUs
2023-04-24 22:42:39.660776: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcupti.so.10.1
2023-04-24 22:42:39.787614: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2023-04-24 22:42:39.787749: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1487] CUPTI activity buffer flushed


In [105]:
class SamplesLoader:
    def __init__(self, audio_samples, x_vectors, batch_size, samples_coeficients):
        self.audio_samples = audio_samples
        self.x_vectors = x_vectors

        self.batch_size = batch_size
        self.samples_coeficients = samples_coeficients

        self.epoch_iterator = None
        self.current_file = None
        self.batch_leftover = None

    def __iter__(self):
        self.epoch_iterator = raw_clips_meta.sample(frac=1).iterrows()
        return self

    # Returns batch of noisy and clean samples
    def __next__(self):
        sample_ind, sample_data = self.epoch_iterator.__next__()
        sample_client = sample_data["client_id"]
        sample_path = get_path(sample_data["path"])

        print(f"Loading sample {sample_ind} from {sample_path}")

        current_audio = tfio.audio.AudioIOTensor(sample_path)
        return current_audio

In [106]:
SAMPLE_COEFICIENTS = {
    "raw": 0.8,
    "empty": 0.1,
    "noisy": 0.4,
    "combined": 0.1,
    "noisy_combined": 0.1,
}

data_loader = SamplesLoader(raw_clips_meta, speakers_xvectors, HYPER_PARAMS["batch-size"], SAMPLE_COEFICIENTS)


In [107]:
a = raw_clips_meta.sample(frac=1).iterrows()
a.__next__()[1]["path"]

'7a59c0c8e4c76e48c46c1c2f7c84b3c07eeeded993e241eeb547ccfe8c6e2529f15b6f678cd48427f0d98ff2c939384ad694a539fc540ba435f290b6d1c95ffc'

In [108]:
b = iter(data_loader)

In [109]:
b.__next__()

Loading sample 10287 from /home/user/commonvoice/en_Common_Voice_Corpus_1/clips/bd136bc43ddbfeb12dbb6321519b04d7d72841be7ce92dc0756daa20e1a3f8171097d31745b2bbc958378b77d5b4f1e13a303c7b9945664410944be7327ba2f0.mp3


NotImplementedError: unable to open file: libtensorflow_io.so, from paths: ['/home/user/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/tensorflow_io/python/ops/libtensorflow_io.so']
caused by: ['/home/user/anaconda3/envs/tf-gpu/lib/python3.9/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZNK10tensorflow4data11DatasetBase18MakeSplitProvidersEPSt6vectorISt10unique_ptrINS0_13SplitProviderESt14default_deleteIS4_EESaIS7_EE']

In [14]:
# Training parameters
epochs = 50
train_steps_per_epoch = 2000  # Adjust this value based on your training set size
val_steps_per_epoch = 500  # Adjust this value based on your validation set size

for epoch in tqdm(range(HYPER_PARAMS["epochs"]), desc="Training"):
    pass
    # Train on batches
    # for batch_X_mel, batch_X_xvec, batch_y in train_dataset:
    #     train_result = model.train_on_batch(
    #         x=[batch_X_mel, batch_X_xvec], y=batch_y, reset_metrics=False
    #     )

    #     # Write train metrics to TensorBoard
    #     with tensorboard_callback.as_default():
    #         tf.summary.scalar("loss", train_result, step=epoch)

    # Validate on batches
    # val_losses = []
    # for batch_X_mel, batch_X_xvec, batch_y in val_dataset:
    #     val_result = model.test_on_batch(x=[batch_X_mel, batch_X_xvec], y=batch_y)

    #     # Collect validation losses
    #     val_losses.append(val_result)

    # Write validation metrics to TensorBoard
    # mean_val_loss = np.mean(val_losses)
    # with tensorboard_callback.as_default():
    #     tf.summary.scalar("val_loss", mean_val_loss, step=epoch)


ImportError: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html