In [None]:
import openai
import os
import csv

def transcribe_audio(api_key, audio_file):
    client = openai.OpenAI(api_key=api_key)

    with open(audio_file, "rb") as file:
        response = client.audio.transcriptions.create(
            model="whisper-1",
            file=file,
            response_format="verbose_json"
        )

    return response.segments

def count_pauses(segments, threshold=1.0):
    """
    Count pauses based on gaps between consecutive segments.
    :param segments: List of transcription segments.
    :param threshold: Time (in seconds) considered as a pause.
    :return: Number of pauses detected.
    """
    pauses = 0
    for i in range(1, len(segments)):
        prev_end = segments[i - 1].end  # Access attributes instead of dictionary keys
        curr_start = segments[i].start
        if curr_start - prev_end >= threshold:
            pauses += 1
    return pauses

def save_to_csv(results, output_file="/content/output_folder/output.csv"):
    """Save pause counts to a CSV file."""
    os.makedirs("/content/output_folder", exist_ok=True)  # Ensure the folder exists

    with open(output_file, mode="w", newline="") as file:
        writer = csv.writer(file)
        writer.writerow(["File Name", "Number of Pauses"])
        writer.writerows(results)

def process_multiple_files(api_key, audio_files, output_file):
    results = []

    for audio_file in audio_files:
        print(f"Processing {audio_file}...")
        segments = transcribe_audio(api_key, audio_file)
        pause_count = count_pauses(segments)
        results.append([os.path.basename(audio_file), pause_count])

    save_to_csv(results, output_file)
    print(f"All pause counts saved to {output_file}")

if __name__ == "__main__":
    API_KEY = ""  # Replace with your OpenAI API key
    AUDIO_FILES = [
        "/content/ABB_segment_2 (1).mp3",
        "/content/Adani Wilmar Limited (NSEI_AWL) Jan-31-2024 - Audio_segment_2.mp3",
        "/content/Adani Wilmar Limited (NSEI_AWL) Jul-30-2024 - Audio_segment_2.mp3"
    ]  # Replace with your list of audio files
    OUTPUT_FILE = "/content/output_folder/output_pause.csv"  # Save CSV inside a folder in Google Colab

    process_multiple_files(API_KEY, AUDIO_FILES, OUTPUT_FILE)


Processing /content/ABB_segment_2 (1).mp3...
Processing /content/Adani Wilmar Limited (NSEI_AWL) Jan-31-2024 - Audio_segment_2.mp3...
Processing /content/Adani Wilmar Limited (NSEI_AWL) Jul-30-2024 - Audio_segment_2.mp3...
All pause counts saved to /content/output_folder/output_pause.csv
