In [9]:
import deepspeech
import numpy as np
import pandas as pd
import wave
from pydub import AudioSegment
import mysql.connector
import json
import io

In [10]:
# Load Pre-trained Model
ds = deepspeech.Model('Resources/deepspeech/pretrained/deepspeech-0.9.3-models.pbmm')

In [13]:
# Load constants
AUDIO_TRAINING_PATH = 'Resources/deepspeech/audio'

For training the DeepSpeech model with custom examples, we need to feed it audio files. Let us create some functions for reading and preprocessing audio files for training purposes, and storing them into database.

In [14]:
# Convert audio files to 16K frames and 1 channel (to be compatible with our pre-trained model)
def convert_to_16k(filename):
    if filename.endswith('.wav'):
        audio = AudioSegment.from_wav(filename)
        return audio.set_frame_rate(16000).set_channels(1)      # Returns audio buffer (bytes)

In [15]:
# Store audio files into training database
def store_into_database(file, cursor):
    query = 'INSERT INTO audio_files(audio_data, meta_data) VALUES (%s, %s)'
    
    audio = convert_to_16k(file)
    
    # Convert audio into bytes
    buffer = io.BytesIO()
    audio.export(buffer, format='wav')
    audio_bytes = buffer.getvalue()
    
    meta_data = json.dumps({'filename':file,
                            'length':len(audio),
                            'frame_rate':audio.frame_rate,
                            'channels':audio.channels})
        
    val = (audio_bytes, meta_data)
    
    cursor.execute(query, val)
    #db.commit() outside the loop

In [18]:
# Read audio files from database
def read_wav_from_database(file_id, cursor):
    # Read Audio file frames
    query = f'SELECT audio_data, meta_data FROM audio_files WHERE id={file_id}'
    cursor.execute(query)
    row = cursor.fetchone()
    
    if row is None:
        raise FileNotFoundError
    
    audio_bytes, meta_data_str = row
    meta_data = json.loads(meta_data_str)
    
    audio_array = np.frombuffer(audio_bytes, np.int16)
    
    return audio_array, meta_data['frame_rate']     # Returns (np.ndarray, json)