In [29]:
import itertools
from time import sleep
import numpy as np
import os
import pandas as pd
import re
import pathlib
import csv
import tensorflow as tf
from tensorflow import keras
from keras.models import load_model
from sklearn.metrics import accuracy_score, f1_score, recall_score
from sklearn.metrics import precision_score
import shutil
import soundfile as sf
import matplotlib.pyplot as plt
import librosa
from pathlib import Path
import keyboard

In [30]:
DATASET_ROOT = os.path.join(os.path.expanduser("~"),'dataSet/audio/agender_distribution/')
NETWORK_ROOT = os.path.join(os.path.expanduser("~"),'Mestrado-PC/github/Conv1D/CNN/')

In [31]:
# pause() function definition.
def pause():
	while True:
		if keyboard.read_key() == 'space':
			# If you put 'space' key
			# the program will resume.
			break

In [32]:
def audio_to_fft(audio):
    # Since tf.signal.fft applies FFT on the innermost dimension, we need to squeeze the dimensions and then expand them again after FFT
    audio = tf.squeeze(audio, axis=-1)
    fft = tf.signal.fft(tf.cast(tf.complex(real=audio, imag=tf.zeros_like(audio)), tf.complex64))
    fft = tf.expand_dims(fft, axis=-1)
    # Return the absolute value of the first half of the FFT which represents the positive frequencies
    return tf.math.abs(fft[:, : (audio.shape[1] // 2), :])

In [33]:
def path_to_audio(path):
    # Reads and decodes an audio file
    audio = tf.io.read_file(path)
    audio, _ = tf.audio.decode_wav(audio, 1, 8000)
    return audio

In [34]:
def paths_and_labels_to_dataset(audio_paths, labels):
    # Constructs a dataset of audios and labels
    path_ds = tf.data.Dataset.from_tensor_slices(audio_paths)
    audio_ds = path_ds.map(lambda x: path_to_audio(x))
    label_ds = tf.data.Dataset.from_tensor_slices(labels)
    return tf.data.Dataset.zip((audio_ds, label_ds))

In [35]:
def map_func(npy_path):
    npy_content = np.load(npy_path)
    return npy_content

In [36]:
def paths_and_labels_to_dataset_HTK(paths, labels):
    path_ds = tf.data.Dataset.from_tensor_slices(paths)
    array_ds = path_ds.map(lambda x: tf.numpy_function(map_func, [x], tf.float64),num_parallel_calls=tf.data.experimental.AUTOTUNE)
    label_ds = tf.data.Dataset.from_tensor_slices(labels)
    return tf.data.Dataset.zip((array_ds, label_ds))

In [37]:
def npy_header_offset(npy_path):
    with open(str(npy_path), 'rb') as f:
        if f.read(6) != b'\x93NUMPY':
            raise ValueError('Invalid NPY file.')
        version_major, version_minor = f.read(2)
        if version_major == 1:
            header_len_size =2
        elif version_major == 2:
            header_len_size = 4
        else:
            raise ValueError('Unknown NPY file version {}.{}.'.format(version_major, version_minor))
        header_len = sum(b << (8 * i) for i, b in enumerate(f.read(header_len_size)))
        header = f.read(header_len)
        if not header.endswith(b'\n'):
            raise ValueError('Invalid NPY file.')
        return f.tell()

In [74]:
train_file_list_path = 'file_lists/HTK-FFT/train_database_sorted2.csv'
test_file_list_path = 'file_lists/HTK-FFT/test_database_sorted2.csv'

In [75]:
train_file_list = pd.read_csv(os.path.join(NETWORK_ROOT, train_file_list_path))
train_audio_files = train_file_list['file']
train_classes = train_file_list['class']

In [79]:
audio_paths = list(train_audio_files)
labels = list(train_classes)

In [41]:
for i in range(len(audio_paths)):
    audio_paths[i] = re.sub('.wav', '-n.mfc.csv', audio_paths[i])

In [102]:
for i in range(len(audio_paths)):
    audio_paths[i] = re.sub('-n.npy', '-n.wav', audio_paths[i])

In [93]:
for i in range(len(audio_paths)):
    audio_paths[i] = os.path.join(DATASET_ROOT, audio_paths[i])

In [None]:
train_ds = paths_and_labels_to_dataset_HTK(audio_paths, labels)
train_ds_fft = paths_and_labels_to_dataset(audio_paths, labels)

In [100]:
path_ds = tf.data.Dataset.from_tensor_slices(audio_paths)

In [None]:
audio_paths

In [None]:
for element in path_ds:
    a = tf.io.read_file(element)
    # b = tf.io.decode_raw(a, out_type=float, little_endian=False)
    print(element)
    print(a)
    '''b = tf.strings.to_number(tf.strings.split(a, sep=" "), tf.float64)
    print(type(b))
    print(b)'''
    input("Press any key to terminate the program")

In [45]:
npy_file = '/home/ferreiraa/dataSet/audio/agender_distribution/wav_traindevel/1006/1/a11006s14-n.npy'

In [46]:
num_features = 1170
dtype = tf.float64

In [47]:
header_offset = npy_header_offset(npy_file)

In [48]:
header_offset

128

In [49]:
dataset = tf.data.FixedLengthRecordDataset([npy_file], num_features * dtype.size, header_bytes=header_offset)

In [None]:
dataset = dataset.map(lambda s: tf.io.decode_raw(s, dtype))

In [50]:
dataset = dataset.map(lambda s: tf.reshape(tf.io.decode_raw(s, dtype), (num_features,)))

In [51]:
dataset

<MapDataset element_spec=TensorSpec(shape=(1170,), dtype=tf.float64, name=None)>

In [52]:
list(dataset.as_numpy_iterator())

[array([-2.108996e+01, -2.541953e+01, -2.317181e+01, ..., -5.002567e-02,
        -1.043014e-03,  5.149288e-02])]

In [53]:
dataset2 = tf.data.FixedLengthRecordDataset([npy_file], num_features * dtype.size, header_bytes=header_offset)

In [55]:
dataset2 = dataset2.map(lambda s: tf.reshape(tf.io.decode_raw(s, dtype), (num_features, 1)))

In [56]:
dataset2

<MapDataset element_spec=TensorSpec(shape=(1170, 1), dtype=tf.float64, name=None)>

In [57]:
list(dataset2.as_numpy_iterator())

[array([[-2.108996e+01],
        [-2.541953e+01],
        [-2.317181e+01],
        ...,
        [-5.002567e-02],
        [-1.043014e-03],
        [ 5.149288e-02]])]

In [95]:
element_dataset = tf.data.FixedLengthRecordDataset([audio_paths], num_features * dtype.size, header_bytes=header_offset)

In [96]:
element_dataset

<FixedLengthRecordDatasetV2 element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>

In [97]:
audio_dataset = element_dataset.map(lambda s: tf.reshape(tf.io.decode_raw(s, dtype), (num_features, 1)))

In [98]:
audio_dataset

<MapDataset element_spec=TensorSpec(shape=(1170, 1), dtype=tf.float64, name=None)>

In [99]:
list(audio_dataset.as_numpy_iterator())

[array([[-2.108996e+01],
        [-2.541953e+01],
        [-2.317181e+01],
        ...,
        [-5.002567e-02],
        [-1.043014e-03],
        [ 5.149288e-02]]),
 array([[-1.342177e+01],
        [-8.199965e+00],
        [-4.861622e+00],
        ...,
        [ 9.349146e-02],
        [ 6.808297e-02],
        [ 3.530367e-03]]),
 array([[-13.65137  ],
        [-29.66002  ],
        [-24.57028  ],
        ...,
        [ -0.4184372],
        [ -0.1826239],
        [  0.3261425]]),
 array([[-1.345268e+01],
        [-1.648699e+01],
        [-1.635873e+01],
        ...,
        [-2.857000e-03],
        [ 1.857936e-04],
        [ 3.254887e-03]]),
 array([[-1.186642e+01],
        [ 2.608200e+00],
        [ 1.934733e+00],
        ...,
        [-9.494931e-03],
        [ 2.645722e-02],
        [ 3.704117e-02]]),
 array([[-16.68189  ],
        [-16.54376  ],
        [-15.9441   ],
        ...,
        [  0.1712335],
        [  0.2193936],
        [  0.1977948]]),
 array([[-11.57684  ],
        [-1

In [70]:
conteudo = pd.read_csv('/home/ferreiraa/dataSet/audio/agender_distribution/wav_traindevel/1006/4/a11006s18-n.mfc.csv', delimiter=' ', nrows=30, header=None)

In [71]:
conteudo

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,29,30,31,32,33,34,35,36,37,38
0,-8.770237,-2.376163,-0.795871,-6.62007,-15.35867,-5.479703,-4.583696,-0.083572,-6.248782,-8.495719,...,0.736038,0.021232,0.845238,0.574815,0.043316,-0.465443,0.35146,0.631143,0.310012,0.051857
1,-0.952211,11.60738,-11.03594,-17.62693,-21.21525,-17.93602,-0.328501,-14.56182,4.411447,-21.1957,...,1.353755,0.521327,1.496014,0.740964,0.635687,-0.80537,0.903474,0.931309,1.49499,0.208564
2,-0.415939,14.79015,-11.98219,-18.91511,-15.92163,-16.52111,-4.894401,-10.27353,3.474319,-17.82473,...,0.899584,1.129841,1.196684,0.768476,0.768728,-0.802689,0.997704,0.877035,2.213919,0.329052
3,-0.993454,14.44876,-14.55492,-13.98667,-20.98696,-11.22763,2.249686,-12.62925,-2.316049,-21.35905,...,-0.157975,1.977557,0.237571,0.077585,0.81698,-0.635036,0.894406,0.421386,2.149121,0.358806
4,-1.49464,14.15547,-18.1973,-9.979894,-17.22401,-7.972981,7.8839,-17.29025,3.661576,-14.38212,...,-0.933498,1.968627,-0.579342,-0.951341,0.696907,-0.775161,0.662629,0.167984,1.292768,0.234895
5,-3.44128,11.08314,-23.07516,-14.17853,-11.89217,-11.14452,7.653436,-13.18472,3.490082,-20.8646,...,-0.223404,0.867514,-0.31997,-1.96782,0.454425,-0.546614,1.16433,0.410782,-0.067501,0.027146
6,-5.352982,7.696323,-28.38673,-24.84312,-3.005089,-15.08222,9.585992,-15.96137,0.368256,-18.59848,...,1.289004,-0.776607,0.366367,-2.523249,0.730988,-0.107211,1.308294,0.201454,-0.126534,-0.101152
7,-8.635302,3.197315,-25.43674,-21.77644,5.825172,-13.31212,8.303496,-15.64725,-4.376194,-17.13935,...,1.893786,-2.101603,0.558234,-1.671049,0.584955,1.199228,0.500505,-0.092172,0.095288,-0.197703
8,-12.1054,-5.573937,-18.61599,-19.4894,8.756953,-12.1595,1.265155,-14.76555,-3.798175,-5.676677,...,1.598554,-2.042001,0.421398,0.265539,0.176733,2.33562,-0.943441,-0.657392,0.19806,-0.26505
9,-15.80602,-10.46008,-9.964294,-10.88223,3.8987,-6.164186,-11.27257,-8.182781,-3.324851,-1.712164,...,0.667536,-0.848598,-0.112848,2.04859,-0.138499,2.192009,-1.573885,-1.002007,0.288489,-0.411464


In [104]:
path_ds = tf.data.Dataset.from_tensor_slices(audio_paths)

In [106]:
audio_ds = path_ds.map(lambda x: path_to_audio(x))

In [108]:
audio_ds

<MapDataset element_spec=TensorSpec(shape=(8000, 1), dtype=tf.float32, name=None)>

In [109]:
audio_dataset

<MapDataset element_spec=TensorSpec(shape=(1170, 1), dtype=tf.float64, name=None)>