Modification of whisper to text.ipynb to handle bulk transcriptions of an entire directory. This uses the CPU based whisper.cpp model that is slower by 14-60x but has the highest quality output with punctuation and acronyms most often correct such that very little if any editing is required. 

Added a file renaming function to remove spaces from any audio filename (as ffmpeg will cut off after the space).

In [None]:
# https://github.com/ggerganov/whisper.cpp
# conda activate py310
import subprocess
import os
import glob

In [None]:
# rename all audio files with spaces in their name
# poe.com assisted code
# Specify the directory where the files are located
#directory = '/var/home/fraser/machine_learning/whisper.cpp/samples/'
directory = '/var/home/fraser/Music/Voice_Memos/'

# Get a list of all audio files, .m4a, .mp3, and .wav files, in the directory
files = glob.glob(os.path.join(directory, '*.m4a')) + \
        glob.glob(os.path.join(directory, '*.mp3')) + \
        glob.glob(os.path.join(directory, '*.ogg')) + \
        glob.glob(os.path.join(directory, '*.wav'))

# Iterate over the files (use this approach also for directory transcription)
for file in files:
    # If the file name contains a space
    if ' ' in file:
        # Replace the spaces with hyphens
        new_name = file.replace(' ', '-')
        # Rename the file
        os.rename(file, new_name)

In [None]:
# iterate over all audio files and transcribe them:
# note that due to the renaming function above, directory information
# is contained in the file variable
for file in files:
    audio_file = file
    # convert audio file to 16-bit wav format required by whisper
    output_file = audio_file + '-output.wav'
    print(audio_file)
    print(output_file)

    # convert audio_file then transcribe to text
    # overwrites existing file with same name with yes_command
    try:
        yes_command = f'echo "y" | '
        subprocess.run([yes_command + 'ffmpeg' + ' -i ' +  audio_file + ' -ar 16000 -ac 1 -c:a pcm_s16le ' 
                        + output_file], shell=True, check=True)
        print("Audio coverted successfully.")
    except subprocess.CalledProcessError as e:
        print(f"Audio convertion failed with error {e.returncode}.")

    # transcribe using the large quantized CPU model, output text file
    try:
        subprocess.run(['transcribe -t 24 -m /var/home/fraser/machine_learning/whisper.cpp/models/ggml-model-whisper-large-q5_0.bin -f ' 
                        + output_file + ' -otxt'], shell=True, check=True)
        print("Transcription executed successfully and saved in " + output_file)
    except subprocess.CalledProcessError as e:
        print(f"Transcription failed with error {e.returncode}.")