In [1]:
!pip install python_speech_features

Collecting python_speech_features
  Downloading python_speech_features-0.6.tar.gz (5.6 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: python_speech_features
  Building wheel for python_speech_features (setup.py) ... [?25l[?25hdone
  Created wheel for python_speech_features: filename=python_speech_features-0.6-py3-none-any.whl size=5870 sha256=5d1e62c5caa5ac6f9cef21079d6fccc5e2e5380235948be3aa58c27f0b40daaf
  Stored in directory: /root/.cache/pip/wheels/5a/9e/68/30bad9462b3926c29e315df16b562216d12bdc215f4d240294
Successfully built python_speech_features
Installing collected packages: python_speech_features
Successfully installed python_speech_features-0.6


In [2]:
!pip install sounddevice

Collecting sounddevice
  Downloading sounddevice-0.4.6-py3-none-any.whl (31 kB)
Installing collected packages: sounddevice
Successfully installed sounddevice-0.4.6


In [3]:

import os
import pathlib
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import python_speech_features
from python_speech_features import mfcc
from IPython.display import Audio
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, Conv1D
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
import librosa
import pathlib
import soundfile as sf


In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
train_data_dir = pathlib.Path('/content/drive/My Drive/Luganda/Dataset/Train')
test_data_dir = pathlib.Path('/content/drive/My Drive/Luganda/Dataset/Test')

process_train_data_dir = pathlib.Path('content/drive/My Drive/Luganda/Dataset/Process_train')
process_test_data_dir = pathlib.Path('content/drive/My Drive/Luganda/Dataset/Process_test')

In [6]:
import sys

sys.path.append("/content/drive/My Drive/Luganda/modules")

In [7]:
from data_processing import create_train_audio_dataset
from data_processing import create_test_audio_dataset

In [None]:
def create_audio_ds(data_dir, batch_size=64, validation_split=0.2, seed=42, output_sequence_length=16000):
  train_ds, val_ds = tf.keras.utils.audio_dataset_from_directory(
        directory=data_dir,
        batch_size=batch_size,
        validation_split=validation_split,
        seed=seed,
        output_sequence_length=output_sequence_length,
        subset='both'
    )

    label_names = np.array(train_ds.class_names)

    def squeeze(audio, labels):
        audio = tf.squeeze(audio, axis=-1)
        return audio, labels

    train_ds = train_ds.map(squeeze, tf.data.AUTOTUNE)
    val_ds = val_ds.map(squeeze, tf.data.AUTOTUNE)

    for example_audio, example_labels in train_ds.take(1):
        print(example_audio.shape)
        print(example_labels.shape)

    train_ds = train_ds.cache().prefetch(tf.data.AUTOTUNE)
    val_ds = val_ds.cache().prefetch(tf.data.AUTOTUNE)

    return train_ds, val_ds, label_names


In [8]:
train_ds, val_ds, label_names = create_train_audio_dataset(train_data_dir)
test_ds = create_test_audio_dataset(test_data_dir)


Found 13375 files belonging to 7 classes.
Using 10700 files for training.
Using 2675 files for validation.
(64, 16000)
(64,)
Found 3304 files belonging to 7 classes.
(64, 16000)
(64,)


In [9]:
for i, label_name in enumerate(label_names, 1):
    print(f"{i}. {label_name}")

labels = list(label_names)
labels

1. ddyo
2. emabega
3. gaali
4. kkono
5. mumaaso
6. unknown
7. yimirira


['ddyo', 'emabega', 'gaali', 'kkono', 'mumaaso', 'unknown', 'yimirira']

In [10]:
test_ds = create_test_audio_dataset(test_data_dir)

Found 3304 files belonging to 7 classes.
(64, 16000)
(64,)


##Feature Extraction of MFCCs & prosody

In [11]:
def extract_mfcc():
  mfccs = mfcc(signal[:rate], rate, numcep=13, nfilt=26, nfft=1103).T
  return mfccs

In [12]:
def extract_features(waveform, n_mfcc=13, hop_length=512, n_fft=2048):
  signal, sr = librosa.load(waveform, sr=16000)
  mfccs = librosa.feature.mfcc(signal, sr=16000, n_mfcc= n_mfcc, hop_length=hop_length, n_ftt=n_ftt)

  pitches, magnitudes = librosa.piptrack(signal, sr=sr, hop_length=hop_length, n_fft=n_fft)
  pitch = np.mean(pitches,axis=0)
  return np.concatenate(mfccs, pitch.reshape(1,-1), axis=0)

In [21]:
import tempfile

In [32]:
def preprocess_mfcc_audio_datasets(train_ds, val_ds, test_ds):
    def get_mfcc(waveform=train_ds, sample_rate=16000, n_mfcc=13, hop_length=218, n_fft=1103):


        signal, sr = librosa.load(waveform, sr=sample_rate )
        mfccs= librosa.feature.mfcc(signal, sr=sample_rate, n_mfcc=n_mfcc, hop_length=hop_length, n_fft=n_fft)

        pitches, magnitudes = librosa.piptrack(signal, sr=sr, hop_length=hop_length, n_fft=n_fft)
        pitch = np.mean(pitches,axis=0)
        pitch = np.concatenate(mfccs, pitch.reshape(1,-1), axis=0)
        pitch = pitch.T

        return mfccs

    def make_mfcc_ds(ds):
        return ds.map(lambda x, y: (get_mfcc(x),y), tf.data.AUTOTUNE)

    train_mfcc_ds = make_mfcc_ds(train_ds)
    val_mfcc_ds = make_mfcc_ds(val_ds)
    test_mfcc_ds = make_mfcc_ds(test_ds)

    train_mfcc_ds = train_mfcc_ds.cache().shuffle(10000).prefetch(tf.data.AUTOTUNE)
    val_mfcc_ds = val_mfcc_ds.cache().prefetch(tf.data.AUTOTUNE)
    test_mfcc_ds = test_mfcc_ds.cache().prefetch(tf.data.AUTOTUNE)

    return train_mfcc_ds, val_mfcc_ds, test_mfcc_ds

In [33]:
train_mfcc_ds, val_mfcc_ds, test_mfcc_ds  = preprocess_mfcc_audio_datasets(train_ds, val_ds, test_ds)

TypeError: in user code:

    File "<ipython-input-32-f14918b64a83>", line 16, in None  *
        lambda x, y: (get_mfcc(x),y)
    File "<ipython-input-15-9022237e7e08>", line 3, in get_mfcc  *
        signal, sr = librosa.load(waveform, sr=sample_rate )
    File "/usr/local/lib/python3.10/dist-packages/librosa/core/audio.py", line 176, in load  *
        y, sr_native = __soundfile_load(path, offset, duration, dtype)
    File "/usr/local/lib/python3.10/dist-packages/librosa/core/audio.py", line 209, in __soundfile_load  *
        context = sf.SoundFile(path)
    File "/usr/local/lib/python3.10/dist-packages/soundfile.py", line 658, in __init__  **
        self._file = self._open(file, mode_int, closefd)
    File "/usr/local/lib/python3.10/dist-packages/soundfile.py", line 1212, in _open
        raise TypeError("Invalid file: {0!r}".format(self.name))

    TypeError: Invalid file: <tf.Tensor 'args_0:0' shape=(None, 16000) dtype=float32>


In [None]:

print(train_mfcc.element_spec)

In [None]:
def build