## Install Libraries

In [None]:
#!pip install SpeechRecognition
#!pip install pydub
#!pip install ffmpeg

## Read libraries

In [None]:
# importing libraries 
import os
import speech_recognition as sr
from pydub import AudioSegment
from pydub.silence import split_on_silence

## Functions

In [None]:
def split_audio_chunks(sound):
    """
    Split the audio file into chunks based on silence.
    """
    chunks = split_on_silence(sound,
        min_silence_len=500,
        silence_thresh=sound.dBFS-14,
        keep_silence=500,
    )
    return chunks

def recognize_audio_chunks(chunks, recognizer, output_file):
    """
    Recognize text from audio chunks and write to the output file while displaying it in the console.
    """
    for i, audio_chunk in enumerate(chunks, start=1):
        audio_chunk.export(os.path.join(AUDIO_FILE_PATH, "temp.wav"), format="wav")
        audio_listened = sr.AudioFile(os.path.join(AUDIO_FILE_PATH, "temp.wav"))
        
        with audio_listened as source:
            try:
                text = recognizer.record(source)
                text = recognizer.recognize_google(text)
            except sr.UnknownValueError as e:
                print("Error:", str(e))
                text = ""
            else:
                text = f"{text.capitalize()}. "
                print(f"Chunk {i}: {text}")
                output_file.write(text + '\n')
        
        os.remove(os.path.join(AUDIO_FILE_PATH, "temp.wav"))

def get_large_audio_transcription(audio_file, show_transcript=True):
    """
    Split the large audio file into chunks, apply speech recognition, and optionally display/export the transcript.
    """
    r = sr.Recognizer()
    
    sound = AudioSegment.from_wav(audio_file)
    chunks = split_audio_chunks(sound)
    
    if not os.path.isdir(AUDIO_FILE_PATH):
        os.mkdir(AUDIO_FILE_PATH)

    with open(output_file_path, "w") as output_file:
        recognize_audio_chunks(chunks, r, output_file)
    
    if show_transcript:
        with open(output_file_path, "r") as output_file:
            transcript = output_file.read()
            print(transcript)
    
    return transcript

## Files

In [None]:
# File & folders
AUDIO_FILE_PATH = "C:\\Farid\\gitProjects\\Speech to Text\\audio2trascript\\data\\"
AUDIO_FILE_NAME = "my_audio.wav"

## Process

In [None]:
# Set input audio file name
audio_file = os.path.join(AUDIO_FILE_PATH, AUDIO_FILE_NAME)

# Set output text file name
output_file_path = os.path.splitext(audio_file)[0] + ".txt"

# Run
transcript = get_large_audio_transcription(audio_file, show_transcript=True)