<a href="https://colab.research.google.com/github/atharvakale31/Early-prediction-of-Alzheimer-using-acoustic-biomarkers/blob/main/Voice_Recording_to_Text.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install openai-whisper
!pip install pandas
!pip install pydub


Collecting openai-whisper
  Downloading openai-whisper-20240930.tar.gz (800 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/800.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━[0m [32m727.0/800.5 kB[0m [31m21.2 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.5/800.5 kB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tiktoken (from openai-whisper)
  Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting triton>=2.0.0 (from openai-whisper)
  Downloading triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.3 kB)
Downloading triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manyli

In [5]:
import zipfile
import os
import pandas as pd
import whisper
import warnings
from pydub import AudioSegment
from tqdm import tqdm

# Ignore warnings
warnings.filterwarnings("ignore")

# Set up paths
zip_path = '/content/test_audios.zip'  # Replace with your ZIP file path
output_dir = '/content/audio_files'     # Directory to extract files
csv_output = '/content/transcriptions_test_audios.csv'

# Load Whisper model (You can choose a specific model like 'base', 'small', etc.)
model = whisper.load_model("medium")

# Extract ZIP file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(output_dir)

# Prepare CSV and batch processing variables
transcriptions = []
batch_size = 300

# Function to process audio file
def transcribe_audio(file_path, file_name):
    # Load and preprocess audio
    audio = AudioSegment.from_file(file_path)
    audio_length = audio.duration_seconds

    # Convert audio to a format compatible with Whisper if needed
    if audio.channels > 1:
        audio = audio.set_channels(1)
    audio.export("temp.wav", format="wav")

    # Transcribe using Whisper
    transcription = model.transcribe("temp.wav")
    text = transcription["text"].strip()

    return file_name, audio_length, text

# Process files and save results in batches
audio_files = os.listdir(output_dir)
for idx, file_name in enumerate(tqdm(audio_files, desc="Processing files")):
    file_path = os.path.join(output_dir, file_name)

    # Check if file is MP3
    if not file_name.lower().endswith(".mp3"):
        print(f"Skipping non-MP3 file: {file_name}")
        continue

    try:
        # Transcribe and save result
        result = transcribe_audio(file_path, file_name)
        transcriptions.append(result)

        # Save results in batches
        if (idx + 1) % batch_size == 0:
            pd.DataFrame(transcriptions, columns=["File Name", "Audio Length", "Transcription"]).to_csv(
                csv_output, mode='a', header=not os.path.exists(csv_output), index=False)
            transcriptions = []  # Reset the batch list

    except Exception as e:
        print(f"Error processing {file_name}: {e}")
        continue

# Save any remaining results after the loop
if transcriptions:
    pd.DataFrame(transcriptions, columns=["File Name", "Audio Length", "Transcription"]).to_csv(
        csv_output, mode='a', header=not os.path.exists(csv_output), index=False)

print("Transcription process completed!")


Processing files: 100%|██████████| 412/412 [30:16<00:00,  4.41s/it]

Transcription process completed!



