## Introduction
This notebook provides a utility for converting audio files to text. It takes a folder of .mp3 files as input and generates transcriptions using the `speech_recognition` library.

# Audio to Text Converter

In [None]:
# Import libraries 
import os
import glob
import speech_recognition as sr

from tqdm import tqdm
from pydub import AudioSegment
from pydub.silence import split_on_silence

### Convert audio folder from mp3 to wav

In [None]:
def convert_mp3_to_wav(input_folder, output_folder):
    """Converts MP3 files to WAV format.
    
    Args:
    input_folder (str): The folder containing the MP3 files.
    output_folder (str): The folder where the WAV files will be saved.
    """
    filelist = glob.glob(input_folder)
    for file in tqdm(filelist):
        name, ext = os.path.splitext(file)
        if ext == ".mp3":
            audio_mp3 = AudioSegment.from_file(file)
            audio_mp3.export(f"{output_folder}/{name}.wav", format="wav")

In [None]:
# Audio folder
path_mp3 = '../data/raw/*'
# Output folder
path_wav = '../data/interim/'

# File list
filelist = glob.glob(path_mp3)

# Convert mp3 to wav
convert_mp3_to_wav(path_mp3, path_wav)

### Convert all audios in a folder to text, using chunks

In [None]:
# Start speech recognizer
recognizer = sr.Recognizer()

# audio folder
path_wav = './data/interim/'
# output folder
path_text = './data/processed/'

# File list
filelist = glob.glob(path_wav)

## Read wav files
for file in tqdm(filelist):
    name, ext = os.path.splitext(file)
    # Filter wav extension
    if ext == ".wav":    
        # Define audio file
        audio_file = AudioSegment.from_file(file)
        
        # split audio when silence is longer than 700 milliseconds
        chunks = split_on_silence(audio_file, min_silence_len = 500, silence_thresh = audio_file.dBFS-14, keep_silence=500)
        
        chunks_folder = "audio-chunks"
        
        # create folder to save audio chunks
        if not os.path.isdir(chunks_folder):
            os.mkdir(chunks_folder )
        
        full_text = ""
        
        # process every chunk
        for i, audio_chunk in enumerate(chunks, start=1):
            # export chunk and save 
            chunks_file = os.path.join(chunks_folder, f"chunk{i}.wav")
            audio_chunk.export(chunks_file, format="wav")
            # Speech recognition 
            with sr.AudioFile(chunks_file) as source:
                recognizer.adjust_for_ambient_noise(source) # Adjusts to eliminate ambient sound
                audio = recognizer.record(source) # Read audio
                try:
                    text = recognizer.recognize_google(audio, language='es-CL') # Text recognized
                    # Export to text file   
                except sr.UnknownValueError:
                       print("Audio not understandable")
                except sr.RequestError as e:
                       print("No requirement obtained {0}".format(e))
                else:
                    text = f"{text.capitalize()}. "
                    full_text += text
                    with open(path_text+"{0}.txt".format(name),mode = 'w') as file:
                        file.write(full_text)
                        file.write("/n")
        


## Conclusion
This notebook offers a straightforward way to convert audio files into text. It can be extended to support more audio formats or to improve the accuracy of the transcriptions.