<a href="https://colab.research.google.com/github/jeremiahoclark/open_source_colabs/blob/main/OAI_TTS_%2B_GROQ_Whisper_Timestamping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Word and Audio Tracking Experiment
# Using OpenAI TTS and Groq Whisper API

# Install required libraries
!pip install openai groq

Collecting openai
  Using cached openai-1.35.13-py3-none-any.whl (328 kB)
Collecting groq
  Downloading groq-0.9.0-py3-none-any.whl (103 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/103.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m103.5/103.5 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [29]:
# Import necessary libraries
import os
import groq

from pathlib import Path
from openai import OpenAI
from google.colab import files
from google.colab import userdata
from difflib import SequenceMatcher

In [21]:
# Set up API keys (you'll need to input these)
openai_api_key = userdata.get('OPENAI_API_KEY')
groq_api_key = userdata.get('GROQ_API_KEY')

client = OpenAI(api_key=openai_api_key)

# 1. Create a sample text file
sample_text = """Hercules, son of Zeus and Alcmene, was a demigod known for his immense strength and courageous feats. From a young age, he exhibited signs of his divine heritage. As a baby, Hercules strangled two snakes sent by Hera, Zeus's wife, to kill him in his cradle. This was a clear indication of his extraordinary powers. Growing up, Hercules was trained in various skills by the finest teachers in Greece. He learned archery from Eurytus, wrestling from Autolycus, and music from Linus. However, Hercules's strength was both a gift and a curse."""

with open("sample_text.txt", "w") as f:
    f.write(sample_text)



In [23]:
# 2. Convert text to speech using OpenAI's TTS API (Updated with streaming)
def text_to_speech(text, output_file="speech.mp3"):
    speech_file_path = Path(output_file)
    response = client.audio.speech.create(
        model="tts-1",
        voice="alloy",
        input=text
    )
    response.stream_to_file(speech_file_path)
    return str(speech_file_path)

audio_file = text_to_speech(sample_text)

# 3. Transcribe audio and get timestamps using Groq's Whisper API
def transcribe_audio(audio_file):
    groq_client = groq.Client(api_key=groq_api_key)
    with open(audio_file, "rb") as audio:
        transcription = groq_client.audio.transcriptions.create(
            file=audio,
            model="whisper-large-v3",
            response_format="verbose_json"
        )
    return transcription

transcription_result = transcribe_audio(audio_file)



  response.stream_to_file(speech_file_path)


In [27]:
# 4. Analyze and display results
print("Original Text:")
print(sample_text)
print("\nTranscribed Text:")
print(transcription_result)


Original Text:
Hercules, son of Zeus and Alcmene, was a demigod known for his immense strength and courageous feats. From a young age, he exhibited signs of his divine heritage. As a baby, Hercules strangled two snakes sent by Hera, Zeus's wife, to kill him in his cradle. This was a clear indication of his extraordinary powers. Growing up, Hercules was trained in various skills by the finest teachers in Greece. He learned archery from Eurytus, wrestling from Autolycus, and music from Linus. However, Hercules's strength was both a gift and a curse.

Transcribed Text:
Transcription(text=" Hercules, son of Zeus and Alcmene, was a demigod known for his immense strength and courageous feats. From a young age, he exhibited signs of his divine heritage. As a baby, Hercules strangled two snakes sent by Hera, Zeus' wife, to kill him in his cradle. This was a clear indication of his extraordinary powers. Growing up, Hercules was trained in various skills by the finest teachers in Greece. He lear

In [31]:
# 4. Analyze and display results
print("Original Text:")
print(sample_text)
print("\nTranscribed Text:")
print(transcription_result.text)

print("\nDetailed Transcription Information:")
print(f"Task: {transcription_result.task}")
print(f"Detected Language: {transcription_result.language}")
print(f"Audio Duration: {transcription_result.duration:.2f} seconds")

print("\nSegments and Word Timestamps:")
for segment in transcription_result.segments:
    print(f"\nSegment {segment['id']}:")
    print(f"  Start: {segment['start']:.2f}s, End: {segment['end']:.2f}s")
    print(f"  Text: {segment['text']}")
    if 'words' in segment:
        print("  Word-level timestamps:")
        for word in segment['words']:
            print(f"    '{word['word']}': Start: {word['start']:.2f}s, End: {word['end']:.2f}s")
    else:
        print("  Word-level timestamps not available for this segment.")

# 5. Compare original text with transcribed text
similarity = SequenceMatcher(None, sample_text, transcription_result.text).ratio()
print(f"\nSimilarity between original and transcribed text: {similarity:.2%}")

# 6. Additional Analysis
word_count_original = len(sample_text.split())
word_count_transcribed = len(transcription_result.text.split())

print(f"\nWord count in original text: {word_count_original}")
print(f"Word count in transcribed text: {word_count_transcribed}")
print(f"Difference in word count: {abs(word_count_original - word_count_transcribed)}")

# Calculate average words per second
words_per_second = word_count_transcribed / transcription_result.duration
print(f"\nAverage words per second: {words_per_second:.2f}")

# Identify potential issues or interesting points
if word_count_original != word_count_transcribed:
    print("\nNote: The word count differs between the original and transcribed text. This could be due to:")
    print("- Misrecognition of words")
    print("- Punctuation differences")
    print("- Handling of contractions or compound words")

if similarity < 0.95:
    print("\nNote: The similarity between original and transcribed text is below 95%. This could indicate:")
    print("- Significant transcription errors")
    print("- Background noise or unclear audio")
    print("- Accent or pronunciation issues")


Original Text:
Hercules, son of Zeus and Alcmene, was a demigod known for his immense strength and courageous feats. From a young age, he exhibited signs of his divine heritage. As a baby, Hercules strangled two snakes sent by Hera, Zeus's wife, to kill him in his cradle. This was a clear indication of his extraordinary powers. Growing up, Hercules was trained in various skills by the finest teachers in Greece. He learned archery from Eurytus, wrestling from Autolycus, and music from Linus. However, Hercules's strength was both a gift and a curse.

Transcribed Text:
 Hercules, son of Zeus and Alcmene, was a demigod known for his immense strength and courageous feats. From a young age, he exhibited signs of his divine heritage. As a baby, Hercules strangled two snakes sent by Hera, Zeus' wife, to kill him in his cradle. This was a clear indication of his extraordinary powers. Growing up, Hercules was trained in various skills by the finest teachers in Greece. He learned archery from Eur