In [1]:
import tensorflow as tf
import tensorflow_addons as tfa
import numpy as np
from pathlib import Path

from tensorflow import Tensor
from tqdm.notebook import tqdm
from typing import Callable, List, Tuple, Union, Optional, Dict, Any, Sequence, Iterable, TypeVar

caused by: ["[Errno 2] The file to load file system plugin from does not exist.: '/Users/jackwang/.local/share/virtualenvs/Code-_CZGGnvj/lib/python3.9/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so'"]
caused by: ["dlopen(/Users/jackwang/.local/share/virtualenvs/Code-_CZGGnvj/lib/python3.9/site-packages/tensorflow_io/python/ops/libtensorflow_io.so, 0x0006): tried: '/Users/jackwang/.local/share/virtualenvs/Code-_CZGGnvj/lib/python3.9/site-packages/tensorflow_io/python/ops/libtensorflow_io.so' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/Users/jackwang/.local/share/virtualenvs/Code-_CZGGnvj/lib/python3.9/site-packages/tensorflow_io/python/ops/libtensorflow_io.so' (no such file), '/Users/jackwang/.local/share/virtualenvs/Code-_CZGGnvj/lib/python3.9/site-packages/tensorflow_io/python/ops/libtensorflow_io.so' (no such file)"]


In [2]:
# Disable all GPUS
tf.config.set_visible_devices([], 'GPU')
visible_devices = tf.config.get_visible_devices()
for device in visible_devices:
    assert device.device_type != 'GPU'

In [3]:
class DataPipeFactory:
    def __init__(self, tfrecord_path, ref_audio_path, word_information_path, cache=None):
        self.tfrecord_path :Path = Path(tfrecord_path)
        self.ref_audio_path :Path = Path(ref_audio_path)
        self.word_information_path :Path = Path(word_information_path)
        self.__cache_status = False
        if not self.tfrecord_path.exists():
            raise FileNotFoundError(f"tfrecord_path {tfrecord_path} not found")
        if not self.ref_audio_path.exists():
            raise FileNotFoundError(f"ref_audio_path {ref_audio_path} not found")
        if not self.word_information_path.exists():
            raise FileNotFoundError(f"word_information_path {word_information_path} not found")
        self.__cache = str(cache)
        self.__pairs : tf.int32 = 2
        self.__available_voice = 4
        self.__mel_bins = 80
        self.__raw_data :tf.data.Dataset = self.__generate_raw_data()
    #create the parser function to parse the serialized generated above
    @staticmethod
    def parse_function(serialized_example : tf.string) -> Dict[str, tf.Tensor]:
        # Define a dict with the data-names and types we expect to find in the
        # serialized example.
        features = {
            'RecordName': tf.io.FixedLenFeature([], tf.string),
            'AudioSegment': tf.io.FixedLenFeature([], tf.string),
            'SampleRate': tf.io.FixedLenFeature([], tf.int64),
            'Sentence': tf.io.FixedLenFeature([], tf.string),
            'WordStart': tf.io.FixedLenFeature([], tf.string),
            'WordDuration': tf.io.FixedLenFeature([], tf.string),
            'MatchSegment': tf.io.FixedLenFeature([], tf.string),
            'MatchReference': tf.io.FixedLenFeature([], tf.string),
        }
        # Parse the input tf.Example proto using the dictionary above.
        e = tf.io.parse_single_example(serialized_example, features)
        #Convert the serialized tensor to tensor
        e['AudioSegment'] = tf.io.parse_tensor(e['AudioSegment'], out_type=tf.int16)
        e['Sentence'] = tf.io.parse_tensor(e['Sentence'], out_type=tf.int64)
        e['WordStart'] = tf.io.parse_tensor(e['WordStart'], out_type=tf.float32)
        e['WordDuration'] = tf.io.parse_tensor(e['WordDuration'], out_type=tf.float32)
        e['MatchSegment'] = tf.io.parse_tensor(e['MatchSegment'], out_type=tf.int64)
        e['MatchReference'] = tf.io.parse_tensor(e['MatchReference'], out_type=tf.int64)
        passage_id = tf.strings.split(e['RecordName'], sep='_')[3]
        #convert tf.string to int
        passage_id = tf.strings.to_number(passage_id, out_type=tf.int32) % 100000
        #convert to tf.string
        e['passage_id'] = tf.strings.as_string(passage_id)
        return e

    def __first_map_builder(self)-> Callable[[dict[str, Tensor]], dict[str, Tensor]]:
        get_mfcc = self.get_mfcc
        ref_audio_path = str(self.ref_audio_path.absolute())
        word_information_path = str(self.word_information_path.absolute())
        available_voice = self.__available_voice
        def created_map(e: Dict[str, tf.Tensor]) -> Dict[str, tf.Tensor]:
            a = {'stu_mfcc': get_mfcc(e['AudioSegment'], e['SampleRate'])}
            ref_audio = tf.io.parse_tensor(tf.io.read_file(ref_audio_path + '/' + e['passage_id'] +'.tfs' ), out_type=tf.int16)
            a['ref_mfcc'] = get_mfcc(ref_audio, e['SampleRate'])
            passage_word = tf.io.parse_tensor(tf.io.read_file(word_information_path + '/' + e['passage_id'] +'_word.tfs' ), out_type=tf.int64)
            reference_time =  tf.io.parse_tensor(tf.io.read_file(word_information_path + '/' + e['passage_id'] +'_ref.tfs' ), out_type=tf.float32)
            a['valid_stu_start'] = tf.gather(e['WordStart'],e['MatchSegment'])
            a['valid_stu_duration'] = tf.gather(e['WordDuration'],e['MatchSegment'])

            a['valid_ref_word'] = tf.gather(passage_word, e['MatchReference'], batch_dims=1)
            a['valid_ref_start'] = tf.gather(reference_time[..., 0], e['MatchReference'], batch_dims=1)
            a['valid_ref_duration'] = tf.gather(reference_time[..., 1], e['MatchReference'], batch_dims=1)

            a['RecordName'] = e['RecordName']
            a['passage_id'] = e['passage_id']
            a['MatchSegment'] = e['MatchSegment']
            a['MatchReference'] = e['MatchReference']

            a['stu_mfcc'].set_shape([None, 80])
            a['ref_mfcc'].set_shape([available_voice, None, 80])
            a['valid_stu_start'].set_shape([available_voice, None])
            a['valid_stu_duration'].set_shape([available_voice, None])
            a['valid_ref_word'].set_shape([available_voice, None])
            a['valid_ref_start'].set_shape([available_voice, None])
            a['valid_ref_duration'].set_shape([available_voice, None])
            a['MatchSegment'].set_shape([available_voice, None])
            a['MatchReference'].set_shape([available_voice, None])
            return a
        return created_map

    def __generate_raw_data(self) -> tf.data.Dataset:
        self.__raw_data = tf.data.TFRecordDataset(self.tfrecord_path, compression_type='GZIP')\
            .map(self.parse_function,  num_parallel_calls=tf.data.AUTOTUNE)\
            .map(self.__first_map_builder(), num_parallel_calls=tf.data.AUTOTUNE)\
            .prefetch(tf.data.AUTOTUNE)
        return self.__raw_data

    @staticmethod
    @tf.function
    def get_mfcc(pcm: int,
                 sample_rate: int = 16000,
                 frame_length : int = 1024) -> tf.float32:
        # Implement the mel-frequency coefficients (MFC) from a raw audio signal.
        pcm = tf.cast(pcm, tf.float32) / tf.int16.max
        st_fft = tf.signal.stft(pcm, frame_length=frame_length, frame_step=frame_length // 8, fft_length=frame_length)
        spectrograms = tf.abs(st_fft)
        # Warp the linear scale spectrograms into the mel-scale.
        num_spectrogram_bins = frame_length // 2 + 1
        lower_edge_hertz, upper_edge_hertz, num_mel_bins = 80.0, 7600.0, 80
        linear_to_mel_weight_matrix =\
            tf.signal.linear_to_mel_weight_matrix(num_mel_bins, num_spectrogram_bins, sample_rate, lower_edge_hertz, upper_edge_hertz)
        mel_spectrograms = tf.einsum('...t,tb->...b', spectrograms, linear_to_mel_weight_matrix)
        log_mel_spectrograms = tf.math.log(mel_spectrograms + 1e-6)
        return log_mel_spectrograms

    @staticmethod
    def __pair_mapping(main : dict[str, tf.Tensor], counter : dict[str, tf.Tensor]) -> dict[str, tf.Tensor]:
        sample_dict = {}
        random_ref_voice_id = tf.random.uniform(shape=[], minval=0, maxval=tf.shape(counter['ref_mfcc'])[0], dtype=tf.int32)
        counter_random_ref_voice_id = tf.random.uniform(shape=[], minval=0, maxval=tf.shape(counter['ref_mfcc'])[0], dtype=tf.int32)
        sample_dict['stu_mfcc'] = main['stu_mfcc']
        sample_dict['ref_mfcc'] = main['ref_mfcc'][random_ref_voice_id]
        sample_dict['valid_stu_start'] = main['valid_stu_start'][random_ref_voice_id]
        sample_dict['valid_stu_duration'] = main['valid_stu_duration'][random_ref_voice_id]
        sample_dict['valid_ref_word'] = main['valid_ref_word'][random_ref_voice_id]
        sample_dict['valid_ref_start'] = main['valid_ref_start'][random_ref_voice_id]
        sample_dict['valid_ref_duration'] = main['valid_ref_duration'][random_ref_voice_id]

        sample_dict['counter_ref_mfcc'] = counter['ref_mfcc'][counter_random_ref_voice_id]

        # Sample same mount of period from counter that match the main
        # Get the range of word under main
        main_word_range = tf.shape(sample_dict['valid_ref_word'])
        # Sample same mount of period from counter that match the main
        # Generate same amount of random integer match up the range of main_word_range
        # counter_word_index = tf.random.uniform(shape=main_word_range, minval=0, maxval=tf.shape(counter['valid_ref_word'][counter_random_ref_voice_id])[0], dtype=tf.int32)
        shuffled_index = tf.random.shuffle(tf.range(tf.shape(counter['valid_ref_word'][counter_random_ref_voice_id])[0]))
        if tf.shape(shuffled_index)[0] > main_word_range[0]:
            counter_word_index = shuffled_index[:main_word_range[0]]
        else:
            counter_word_index = tf.random.uniform(shape=main_word_range, minval=0, maxval=tf.shape(counter['valid_ref_word'][counter_random_ref_voice_id])[0], dtype=tf.int32)
            # replace the value in the range of shuffled_index with the value in counter_word_index
            counter_word_index = \
                tf.tensor_scatter_nd_update(
                    counter_word_index,
                    tf.range(tf.shape(shuffled_index)[0])[...,tf.newaxis],
                    shuffled_index)
        # Sample data using counter_word_index
        sample_dict['counter_valid_ref_word'] = \
            tf.gather(counter['valid_ref_word'][counter_random_ref_voice_id], counter_word_index)
        sample_dict['counter_valid_ref_start'] = \
            tf.gather(counter['valid_ref_start'][counter_random_ref_voice_id], counter_word_index)
        sample_dict['counter_valid_ref_duration'] = \
            tf.gather(counter['valid_ref_duration'][counter_random_ref_voice_id], counter_word_index)
        # determine if counter_valid_ref_word with main_valid_ref_word match up if match up return 1. else return -1.
        sample_dict['counter_word_match'] = tf.where(tf.equal(sample_dict['counter_valid_ref_word'],
                                                              sample_dict['valid_ref_word']), 1., -1.)
        sample_dict['counter_pool_index'] = counter_word_index
        return sample_dict

    def pre_save(self) -> None:
        self.__raw_data.save(self.__cache, compression='GZIP')
        self.__cache_status = True
        self.__raw_data = tf.data.Dataset.load(self.__cache).load(self.__cache)
        print(f'Cache saved to {self.__cache}')

    def get_raw_data(self) -> tf.data.Dataset:
        if Path(self.__cache).exists() and not self.__cache_status:
            self.__cache_status = True
            print(f'Load cache from {self.__cache}')
            self.__raw_data = tf.data.Dataset.load(self.__cache, compression='GZIP')
        return self.__raw_data

    def get_pair_data(self) -> tf.data.Dataset:
        return self.get_raw_data().apply(self.__pair_map_handle(self.__pairs))

    def __batching_handle(self, batch_size : int) -> Callable[[tf.data.Dataset], tf.data.Dataset]:
        def handle(ds):
            return ds\
                .padded_batch(batch_size,
                              padding_values={k:tf.cast(-1, v.dtype) if v.dtype != tf.string else '' for k, v in ds.element_spec.items()})\
                .prefetch(tf.data.experimental.AUTOTUNE)
        return handle

    def __pair_map_handle(self, pairs : int,
                          deterministic : bool = True)\
            -> Callable[[tf.data.Dataset], tf.data.Dataset]:
        def handle(ds):
            tuple_of_pairs = tuple(ds.shuffle(20, reshuffle_each_iteration=True) for _ in range(pairs))
            comb_data = tf.data.Dataset.zip(tuple_of_pairs).filter(lambda x, y: x["RecordName"] != y["RecordName"])
            return comb_data.map(self.__pair_mapping, num_parallel_calls=tf.data.AUTOTUNE,
                                 deterministic=deterministic)\
                .shuffle(buffer_size=10, reshuffle_each_iteration=True)
        return handle

    def k_fold(self, total_fold : int,
               fold_index : int,
               batch_size : int,
               deterministic :bool = False)\
            -> Tuple[tf.data.Dataset, tf.data.Dataset]:
        if fold_index >= total_fold:
            raise ValueError("fold_index must be less than total_fold")
        indexed_data = self.get_raw_data().enumerate()
        train_data = indexed_data\
            .filter(lambda index, _: index % total_fold != fold_index)\
            .map(lambda _, data: data, num_parallel_calls=tf.data.AUTOTUNE, deterministic=deterministic)\
            .apply(self.__pair_map_handle(self.__pairs, deterministic=deterministic))\
            .apply(self.__batching_handle(batch_size))

        test_data = indexed_data\
            .filter(lambda index, _: index % total_fold == fold_index)\
            .map(lambda _, data: data, num_parallel_calls=tf.data.AUTOTUNE, deterministic=deterministic)\
            .apply(self.__pair_map_handle(self.__pairs, deterministic=deterministic))\
            .apply(self.__batching_handle(batch_size))
        return train_data, test_data

    def get_batch_data(self,
                       batch_size: int,
                       deterministic = False) -> tf.data.Dataset:
        return self.get_raw_data().apply(self.__pair_map_handle(self.__pairs, deterministic = deterministic)).apply(self.__batching_handle(batch_size))

In [4]:
#####
#####

In [5]:
ds = DataPipeFactory('../DataFolder/Tensorflow_DataRecord/Student_Answer_Record.tfrecord',
                     '../DataFolder/Siri_Related/Siri_Reference_Sample',
                     '../DataFolder/Siri_Related/Siri_Dense_Index', cache='../DataFolder/cache/datapipe/cached')
# dsp = ds.get_batch_data(10)
# it = iter(dsp)
ds.get_raw_data()
A = tf.Variable(-1.)

Load cache from ../DataFolder/cache/datapipe/cached


In [6]:
#ds.pre_save()

In [7]:
@tf.function
def inform_pooling(value, start, duration, ratio):
    batch = tf.shape(value)[0]
    end = start + duration
    start = tf.math.floor(start * ratio)
    end = tf.math.ceil((end + 0.001) * ratio)

    period = tf.cast(tf.stack([start, end], axis=-1), tf.int32)
    tf.debugging.assert_less(period[...,0], period[...,1])
    ret_b = tf.TensorArray(tf.float32, batch, infer_shape=False)
    ret_count = tf.TensorArray(tf.int32, batch)
    for batch_index in tf.range(batch):
        value_l = value[batch_index]
        val_ind_max = tf.shape(value_l)[0]
        period_l = period[batch_index]
        period_l_p = tf.math.minimum(period_l, val_ind_max - 1)
        ret_count = ret_count.write(batch_index, tf.shape(period_l)[0])
        indexes = tf.ragged.range(period_l_p[..., 0], period_l_p[..., 1])
        value_indices= tf.gather(value_l, indexes)
        pooled = tf.reduce_mean(value_indices, axis=1)
        ret_b = ret_b.write(batch_index, pooled)
    row_length = ret_count.stack()
    ret = ret_b.concat()
    return tf.RaggedTensor.from_row_lengths(ret, row_length)

In [8]:
@tf.function
def Get_Gradient(value, start, duration, ratio):
    with tf.GradientTape() as t:
        v2 = value * A
        Final_Tensor = inform_pooling(v2, start, duration, ratio)
        J = tf.reduce_mean(Final_Tensor)
        # tf.print(f'Losses: {J}, Output Shape: {tf.shape(Final_Tensor)}, Input Shape{tf.shape(v2)}')
        G = t.gradient(J,[A])
    return G

In [9]:
for i,d in tqdm(enumerate(ds.get_batch_data(10))):
    batch = 10
    value = tf.RaggedTensor.from_tensor(d['stu_mfcc'], padding=-1*tf.ones(80,))
    start = tf.RaggedTensor.from_tensor(d['valid_stu_start'], padding=-1.)
    duration = tf.RaggedTensor.from_tensor(d['valid_stu_duration'], padding=-1.)
    ratio = 125
    Get_Gradient(value, start, duration, ratio)
    # print(i)
    # with tf.GradientTape() as t:
    #     v2 = value * A
    #     Final_Tensor = inform_pooling(batch, v2, start, duration, ratio)
    #     J = tf.reduce_mean(Final_Tensor)
    # print('Start Gradient')
    # print(f'\tLoss Value: {J}, \n\tOutput Shape {tf.shape(Final_Tensor)}')
    # G = t.gradient(J,A)
    # print(i,G)
    # print('End Gradient\n\n')
    # del t
    # print(G)
    # if i ==30 :break
    pass

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


0it [00:00, ?it/s]

2023-01-25 16:09:35.217810: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


In [None]:
d['stu_mfcc'].shape

In [None]:
aa = tf.RaggedTensor.from_tensor(d['stu_mfcc'], padding=-1*tf.ones(80,))

In [None]:
shap = tf.shape(aa)

In [None]:
s2 = tf.experimental.DynamicRaggedShape.from_lengths(shap.static_lengths(True)[:-1] + [2,40])

In [None]:
shap.inner_shape

In [None]:
tf.reshape(aa, s2)

In [None]:
tf.RaggedTensor.from_tensor(d['valid_stu_start'], padding=-1.)[2]*125

In [None]:
tf.RaggedTensor.from_tensor(d['valid_stu_duration'], padding=-1.)[2]*125

In [None]:
A = tf.Variable(-1.)

In [None]:
batch = 10
value = tf.RaggedTensor.from_tensor(d['stu_mfcc'], padding=-1*tf.ones(80,))
start = tf.RaggedTensor.from_tensor(d['valid_stu_start'], padding=-1.)
duration = tf.RaggedTensor.from_tensor(d['valid_stu_duration'], padding=-1.)
ratio = 125
end = start + duration
period = tf.cast(tf.stack([start, end], axis=-1) * ratio, tf.int64)

In [None]:
with tf.GradientTape() as t:
    v2 = value * A
    Final_Tensor = inform_pooling(batch, v2, start, duration, ratio)
    J = tf.reduce_mean(Final_Tensor)
G = t.gradient(J,A)
print(G)

In [None]:
tf.reduce_mean(Final_Tensor)

In [None]:
Final_Tensor.shape

In [None]:
tf.shape(tf.ragged.stack([start,duration], axis=-1))

In [None]:
start.shape

In [None]:
rag = tf.ragged.range(period[..., 0][0], period[...,1][0])

In [None]:
g = tf.gather(value[0],rag)

In [None]:
tf.reduce_mean(g, axis=1)

In [None]:
a,b = ds.k_fold(5, 0, 10)

In [None]:
dsr = ds.get_raw_data()

In [None]:
next(iter(dsr.shuffle(5).window(2)))

In [None]:
###
#
# main_word_range = tf.range(tf.shape(i[0]['valid_ref_word'])[1])

In [None]:
Path(str(None))

In [None]:
dataset = tf.data.Dataset.range(30)
window_size = 5
key_func = lambda x: x%3
reduce_func = lambda key, dataset: dataset.batch(window_size)
dataset = dataset.group_by_window(
    key_func=key_func,
    reduce_func=reduce_func,
    window_size=window_size)
for elem in dataset.as_numpy_iterator():
    print(elem)