# Setup

In [None]:
audios_dir = None
save_dir = None

In [None]:
import os

dir_path = os.getcwd()

if not audios_dir:
    audios_dir = f'{dir_path}/Audio'

if not save_dir:
    save_dir = f'{dir_path}/audio_detection'

In [None]:
! pip install git+https://github.com/openai/whisper.git
! pip install ffmpeg-python

Collecting git+https://github.com/openai/whisper.git
  Cloning https://github.com/openai/whisper.git to /tmp/pip-req-build-m7wyz5z1
  Running command git clone --filter=blob:none --quiet https://github.com/openai/whisper.git /tmp/pip-req-build-m7wyz5z1
  Resolved https://github.com/openai/whisper.git to commit ba3f3cd54b0e5b8ce1ab3de13e32122d0d5f98ab
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone


In [None]:
import os
import json
import whisper
from tqdm import tqdm

# Parse data path

In [None]:
def parse_audio_info(audios_dir='./Audio'):
    all_audio_paths = {}
    for part in sorted(os.listdir(audios_dir)):
        data_part = part.split('/')[-1]
        all_audio_paths[data_part] = {}

    for data_part in sorted(all_audio_paths.keys()):
        data_part_path = f'{audios_dir}/{data_part}'
        audio_dirs = sorted(os.listdir(data_part_path))
        audio_ids = [audio_dir.split('_')[-1] for audio_dir in audio_dirs]
        for audio_id, audio_dir in zip(audio_ids, audio_dirs):
            keyframe_paths = sorted(
                glob.glob(f'{data_part_path}/{audio_dir}/*.mp3'))
            all_audio_paths[data_part][audio_id] = keyframe_paths

    return all_audio_paths

# Audio detection

In [None]:
import os
import json
from tqdm import tqdm


def create_directory(path):
    """Create a directory if it does not exist."""
    if not os.path.exists(path):
        os.makedirs(path)


def parse_audio_info(audios_dir='./Audio'):
    """
    Parse audio information from the directory structure.
    
    Parameters:
    - audios_dir: Path to the directory containing audio files.
    
    Returns:
    - all_audio_paths: Dictionary containing audio paths categorized by part and ID.
    """
    all_audio_paths = {}
    for part in sorted(os.listdir(audios_dir)):
        all_audio_paths[part] = {}

    for data_part in sorted(all_audio_paths.keys()):
        data_part_path = f'{audios_dir}/{data_part}'
        audio_paths = sorted(os.listdir(data_part_path))
        for audio_path in audio_paths:
            # Update extension if needed
            audio_id = audio_path.replace('.mp3', '')
            audio_path_full = f'{data_part_path}/{audio_path}'
            all_audio_paths[data_part][audio_id] = audio_path_full

    return all_audio_paths


def transcribe_audio(audio_path, model):
    """
    Transcribe audio using Whisper ASR model.
    
    Parameters:
    - audio_path: Path to the audio file.
    - model: Whisper model.
    
    Returns:
    - transcription: Transcribed text from the audio.
    """
    try:
        result = model.transcribe(audio_path)
        return result["text"]
    except Exception as e:
        print(f"Error transcribing audio {audio_path}: {e}")
        return ""


def transcribe_and_save(all_audio_paths, save_dir, model):
    """Transcribe audio for each audio file and save the results as JSON files."""
    create_directory(save_dir)

    for key in tqdm(all_audio_paths.keys(), desc="Processing parts"):
        key_dir = f'{save_dir}/{key}'
        create_directory(key_dir)

        audio_paths_dict = all_audio_paths[key]
        audio_ids = sorted(audio_paths_dict.keys())

        for audio_id in tqdm(audio_ids, desc=f"Processing {key}"):
            audio_path = audio_paths_dict[audio_id]
            transcription = transcribe_audio(audio_path, model)

            # Split the transcription into lines and create a dictionary
            lines = transcription.split('\n')
            result = {f"line_{i+1}": line for i, line in enumerate(lines)}

            with open(f'{key_dir}/{audio_id}.json', 'w', encoding='utf-8') as f:
                json.dump(result, f, ensure_ascii=False, indent=4)
                print(f"Transcription saved to {key_dir}/{audio_id}.json")

In [None]:
all_audio_paths = parse_audio_info(audios_dir)
model = whisper.load_model("base")

transcribe_and_save(all_audio_paths, save_dir, model)

Processing L01: 100%|██████████| 1/1 [04:07<00:00, 247.99s/it]
Processing parts:  50%|█████     | 1/2 [04:08<04:08, 248.00s/it]

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L01/V001.json




Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V001.json




Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V002.json




Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V003.json




Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V004.json




Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V005.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V005.json




Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V006.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V006.json




Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V007.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V007.json




Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V008.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V008.json




Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V009.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V009.json




Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V010.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V010.json




Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V011.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V011.json




Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V012.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V012.json




Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V013.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V013.json




Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V014.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V014.json




Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V015.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V015.json




Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V016.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V016.json




Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V017.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V017.json




Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V018.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V018.json




Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V019.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V019.json




Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V020.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V020.json




Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V021.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V021.json




Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V022.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V022.json




Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V023.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V023.json




Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V024.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V024.json




Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V025.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V025.json




Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V026.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V026.json




Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V027.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V027.json




Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V028.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V028.json




Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V029.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V029.json


Processing L21: 100%|██████████| 30/30 [1:01:16<00:00, 122.55s/it]
Processing parts: 100%|██████████| 2/2 [1:05:24<00:00, 1962.25s/it]

Error transcribing audio /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/Audio/L21/V030.mp3: CUDA error: unspecified launch failure
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.

Transcription saved to /home/jiggle/personal/competition/hcm-ai/Pipeline_HCM_AI/notebooks/data_extraction/audio/audio_detection/L21/V030.json



