# Transcribe Audio

# Transcribe YouTube Audios

In [2]:
import os
import whisper

# Load the Whisper small model
model = whisper.load_model("small")

# Directory containing the MP3 files
audio_dir = "../data/input/audio/guided_meditations/youtube"

# Directory to save the transcribed text files
text_dir = "../data/input/text_transcribed"
os.makedirs(text_dir, exist_ok=True)  # Create the directory if it doesn't exist

# List all MP3 files in the directory
audio_files = [f for f in os.listdir(audio_dir) if f.endswith('.mp3')]

# Transcribe each audio file and save the transcription
for audio_file in audio_files:
    audio_file_path = os.path.join(audio_dir, audio_file)
    print(f"Transcribing '{audio_file}'...")

    # Transcribe the audio file
    result = model.transcribe(audio_file_path, language="en")
    transcribed_text = result["text"]

    # Define the path for the output text file
    text_file_path = os.path.join(text_dir, f"{os.path.splitext(audio_file)[0]}.txt")

    # Save the transcribed text to a file
    with open(text_file_path, 'w') as text_file:
        text_file.write(transcribed_text)
    
    print(f"Transcription saved to '{text_file_path}'")
    print("-" * 80)


KeyboardInterrupt: 

# Transcribe Audio Dharma Talks and Guided Meditations

In [None]:
import os
import whisper

# Load the Whisper small model
model = whisper.load_model("small")

# Directory containing the MP3 files
audio_dir = "../data/input/audio/guided_meditations/audiodharma_dharmaseed"

# Directory to save the transcribed text files
text_dir = "../data/input/audio_transcribed/transscribed_audio_dharma"
os.makedirs(text_dir, exist_ok=True)  # Create the directory if it doesn't exist

# List all MP3 files in the directory
audio_files = [f for f in os.listdir(audio_dir) if f.endswith('.mp3')]

# Transcribe each audio file and save the transcription
for audio_file in audio_files:
    audio_file_path = os.path.join(audio_dir, audio_file)
    print(f"Transcribing '{audio_file}'...")

    # Transcribe the audio file
    result = model.transcribe(audio_file_path, language="en")
    transcribed_text = result["text"]

    # Define the path for the output text file
    text_file_path = os.path.join(text_dir, f"{os.path.splitext(audio_file)[0]}.txt")

    # Save the transcribed text to a file
    with open(text_file_path, 'w') as text_file:
        text_file.write(transcribed_text)
    
    print(f"Transcription saved to '{text_file_path}'")
    print("-" * 80)


Transcribing '20190818-Leigh_Brasington-TSD-jhanas_5_8-64183.mp3'...
Transcription saved to '../data/input/audio_transcribed/transscribed_audio_dharma/20190818-Leigh_Brasington-TSD-jhanas_5_8-64183.txt'
--------------------------------------------------------------------------------
Transcribing '20110109-Rick_Hanson-SR-jhana_factors-11614.mp3'...
Transcription saved to '../data/input/audio_transcribed/transscribed_audio_dharma/20110109-Rick_Hanson-SR-jhana_factors-11614.txt'
--------------------------------------------------------------------------------
Transcribing '20110216-Donald_Rothberg-SR-concentration_practice_iii-11987.mp3'...
Transcription saved to '../data/input/audio_transcribed/transscribed_audio_dharma/20110216-Donald_Rothberg-SR-concentration_practice_iii-11987.txt'
--------------------------------------------------------------------------------
Transcribing '20151023-Marcia_Rose-IMSFR-equanimity-31149.mp3'...
Transcription saved to '../data/input/audio_transcribed/tran

# Transcribe New Audio Dharma Talks

In [4]:
import os
import whisper

# Load the Whisper small model
model = whisper.load_model("small")

# Directory containing the MP3 files
audio_dir = "../data/input/audio/guided_meditations/audiodharma_dharmaseed"

# Directory to save the transcribed text files
text_dir = "../data/input/audio_transcribed/transscribed_audio_dharma"
os.makedirs(text_dir, exist_ok=True)  # Create the directory if it doesn't exist

# List all MP3 files in the directory
audio_files = [f for f in os.listdir(audio_dir) if f.endswith('.mp3')]

# Transcribe each audio file and save the transcription
for audio_file in audio_files:
    # Define the path for the output text file
    text_file_name = f"{os.path.splitext(audio_file)[0]}.txt"
    text_file_path = os.path.join(text_dir, text_file_name)

    # Check if the transcription text file already exists
    if os.path.exists(text_file_path):
        print(f"Transcription for '{audio_file}' already exists. Skipping transcription.")
        continue

    audio_file_path = os.path.join(audio_dir, audio_file)
    print(f"Transcribing '{audio_file}'...")

    # Transcribe the audio file
    result = model.transcribe(audio_file_path, language="en")
    transcribed_text = result["text"]

    # Save the transcribed text to a file
    with open(text_file_path, 'w') as text_file:
        text_file.write(transcribed_text)
    
    print(f"Transcription saved to '{text_file_path}'")
    print("-" * 80)


Transcription for '20190818-Leigh_Brasington-TSD-jhanas_5_8-64183.mp3' already exists. Skipping transcription.
Transcription for '20110109-Rick_Hanson-SR-jhana_factors-11614.mp3' already exists. Skipping transcription.
Transcription for '20110216-Donald_Rothberg-SR-concentration_practice_iii-11987.mp3' already exists. Skipping transcription.
Transcription for '20151023-Marcia_Rose-IMSFR-equanimity-31149.mp3' already exists. Skipping transcription.
Transcription for '20160729-Bhante_Henepola_Gunaratana-BSWV-2016_jhana_retreat_day_5_meditation_instructions-35433.mp3' already exists. Skipping transcription.
Transcription for '20201004-Ajahn_Sucitto-CITTA-dhamma_stream_q_a-62768.mp3' already exists. Skipping transcription.
Transcription for '20090115-Tina_Rasmussen-VAR-2009_monthly_sitting_group_month_1_overview_of_the_samatha_practice-51845.mp3' already exists. Skipping transcription.
Transcribing '16_Talk_Dependend Origination.mp3'...
Transcription saved to '../data/input/audio_transcrib

# Transcribe New YouTube Audios

In [2]:
import os
import whisper

# Load the Whisper small model
model = whisper.load_model("small")

# Directory containing the MP3 files
audio_dir = "../data/input/audio/guided_meditations/youtube"

# Directory to save the transcribed text files
text_dir = "../data/input/audio_transcribed/transcribed_youtube"
os.makedirs(text_dir, exist_ok=True)  # Create the directory if it doesn't exist

# List all MP3 files in the directory
audio_files = [f for f in os.listdir(audio_dir) if f.endswith('.mp3')]

# Transcribe each audio file and save the transcription
for audio_file in audio_files:
    # Define the path for the output text file
    text_file_name = f"{os.path.splitext(audio_file)[0]}.txt"
    text_file_path = os.path.join(text_dir, text_file_name)

    # Check if the transcription text file already exists
    if os.path.exists(text_file_path):
        print(f"Transcription for '{audio_file}' already exists. Skipping transcription.")
        continue

    audio_file_path = os.path.join(audio_dir, audio_file)
    print(f"Transcribing '{audio_file}'...")

    # Transcribe the audio file
    result = model.transcribe(audio_file_path, language="en")
    transcribed_text = result["text"]

    # Save the transcribed text to a file
    with open(text_file_path, 'w') as text_file:
        text_file.write(transcribed_text)
    
    print(f"Transcription saved to '{text_file_path}'")
    print("-" * 80)

Transcription for 'Can jhanas change your 'emotional set-point'? ~ Leigh Brasington with Stephanie Nash.mp3' already exists. Skipping transcription.
Transcription for 'Introduction to Collection of Jhourneys.mp3' already exists. Skipping transcription.
Transcription for 'Meditation With Focus On Jhana - Ven Ajahn Brahm.mp3' already exists. Skipping transcription.
Transcription for 'Guided meditation through the four jhanas.mp3' already exists. Skipping transcription.
Transcription for 'Guided sunrise meditation to the first jhana.mp3' already exists. Skipping transcription.
Transcription for 'The Four Jhanas ~ A Guided Meditation ~ Ajahn Lee ~ Theravadin Forest Tradition.mp3' already exists. Skipping transcription.
Transcription for 'Do jhanas increase concentration? ~ Leigh Brasington with Stephanie Nash.mp3' already exists. Skipping transcription.
Transcription for 'Brain Study on Jhanas ~ Leigh Brasington with Stephanie Nash.mp3' already exists. Skipping transcription.
Transcription