# Libraries and functions

In [1]:
# import wavio as wv
# from gtts import gTTS
# import sounddevice as sd
from IPython.display import Audio

def audio_recording(seconds=5,frequency=44100):
    
    """
    This function record audio data and return it in a wav format file
    """
    
    import wavio as wv
    import sounddevice as sd

    print("Start recording...")
    recording = sd.rec(int(seconds*frequency),samplerate=frequency,channels=2) # recording data
    sd.wait() # stop recording
    print("Stop recording")
    return wv.write("recorded_audio.wav",recording,frequency,sampwidth=2) # saving data
    
def speech2text(path):

    """
    This function split the audio file into chunks
    and apply speech recognition on each of them
    """
    
    import os 
    from pydub import AudioSegment
    import speech_recognition as sr 
    from pydub.silence import split_on_silence
    import warnings
    warnings.filterwarnings("ignore")
    
    r = sr.Recognizer() # create a speech recognition object
    sound = AudioSegment.from_wav(path) # open the audio file using pydub
    chunks = split_on_silence(sound, # split audio sound into chunks
        min_silence_len = 500,
        silence_thresh = sound.dBFS-14,
        keep_silence=500)
    folder_name = "audio-chunks" # folder where chuncks are store
    if not os.path.isdir(folder_name): # create a directory to store the audio chunks
        os.mkdir(folder_name)
    whole_text = ""
    for i, audio_chunk in enumerate(chunks, start=1): # process each chunk 
        chunk_filename = os.path.join(folder_name, f"chunk{i}.wav") # export audio chunk and save it in the `folder_name` directory.
        audio_chunk.export(chunk_filename, format="wav")
        with sr.AudioFile(chunk_filename) as source: # recognize the chunk
            audio_listened = r.record(source)
            try: # try converting it to text
                text = r.recognize_google(audio_listened)
            except sr.UnknownValueError as e:
                print("Error:", str(e))
            else:
                text = f"{text.capitalize()}. "
                print(chunk_filename, ":", text)
                whole_text += text
    return whole_text 

def text2speech():
    
    """
    This function generate a file audio in wav format with a text input
    """
    
    from gtts import gTTS
    
    audio = gTTS(text=transcription) # transcribing text data
    return audio.save("transcripted_audio.wav") # saving transcribed text data in a audio file

# Audio Recording

In [2]:
audio_recording(seconds=10)

Start recording...
Stop recording


# Speech Recognition (Recorded Audio Display) 

In [3]:
Audio("recorded_audio.wav")

# Speech to Text

In [4]:
transcription = speech2text("recorded_audio.wav")
print(f"\n\n Transcription: \n {transcription}")
print("\n Real phrase: \n 'Good morning. We are testing the mic. I am your host in todays show. I hope you enjoy it.'")



result2:
{   'alternative': [   {   'confidence': 0.87471259,
                           'transcript': 'good morning we are testing the mic '
                                         'I am your closing today show I hope '
                                         'you enjoyed'},
                       {   'transcript': 'good morning we are testing the mic '
                                         'I am your cousin Today show I hope '
                                         'you enjoyed'},
                       {   'transcript': 'good morning we are testing the mic '
                                         "I am you're hosting Today show I "
                                         'hope you enjoyed'},
                       {   'transcript': 'good morning we are testing the mic '
                                         "I am you're closing today show I "
                                         'hope you enjoyed'},
                       {   'transcript': 'good morning we are testi

# Text to Speech

In [5]:
text2speech()
Audio("transcripted_audio.wav")