In [None]:
!pip install whisper



In [None]:
!pip install openai-whisper



In [None]:
!pip install openai_whisper



In [None]:
#!/usr/bin/env python3
"""
Speech Fluency & Disfluency Analysis

This script analyzes how a speaker is speaking:
- Detects repeated words (possible stuttering/nervousness)
- Detects filler words ("um", "uh", etc.)
- Detects long pauses (hesitation)
- Computes average pause duration and speech rate

Requirements:
-------------
- pip install openai-whisper
- pip install torch librosa

Usage:
------
python fluency_analysis.py
# You will be asked to enter the path of the audio file.
"""

import whisper
import numpy as np
from pathlib import Path

FILLERS = {"um", "uh", "erm", "hmm", "you know", "like"}

def analyze_fluency(audio_path: str):
    model = whisper.load_model("base")
    result = model.transcribe(audio_path, word_timestamps=True)

    words = []
    for seg in result["segments"]:
        if "words" in seg:
            words.extend(seg["words"])

    repetitions = 0
    filler_count = 0
    pauses = []
    prev_word, prev_end = None, None

    print("Transcript with analysis:\n")
    for w in words:
        word = w["word"].lower().strip()
        start, end = w["start"], w["end"]

        # Filler words
        if word in FILLERS:
            filler_count += 1
            print(f"[Filler detected: {word}]")

        # Repetitions
        if prev_word and word == prev_word:
            repetitions += 1
            print(f"[Repetition detected: {word}]")

        # Pauses
        if prev_end is not None:
            pause = start - prev_end
            if pause > 0.7:  # more than 0.7s silence
                pauses.append(pause)
                print(f"[Pause of {pause:.2f}s detected]")

        prev_word, prev_end = word, end

    # Summary metrics
    total_words = len(words)
    duration = result["segments"][-1]["end"] if result["segments"] else 0
    speech_rate = total_words / duration if duration > 0 else 0

    print("\n=== Speaking Style Analysis ===")
    print(f"Repetitions detected: {repetitions}")
    print(f"Filler words detected: {filler_count}")
    if pauses:
        avg_pause = sum(pauses) / len(pauses)
        print(f"Average pause: {avg_pause:.2f}s")
    else:
        print("No significant pauses detected.")
    print(f"Speech rate: {speech_rate:.2f} words/sec")
    print(f"Total words: {total_words}")


def main():
    audio_path = input("Enter path to audio file: ").strip()

    if not Path(audio_path).exists():
        print(f"File not found: {audio_path}")
        return

    analyze_fluency(audio_path)


if __name__ == "__main__":
    main()


Enter path to audio file: /content/drive/MyDrive/harvard.wav




Transcript with analysis:

[Pause of 0.84s detected]
[Pause of 0.80s detected]
[Pause of 0.80s detected]

=== Speaking Style Analysis ===
Repetitions detected: 0
Filler words detected: 0
Average pause: 0.81s
Speech rate: 2.47 words/sec
Total words: 43


Please upload your audio file (audio_sample.wav) using the file upload button on the left sidebar.

In [None]:
import whisper
import torch

# Load the Whisper model
model = whisper.load_model("base")

# Define the path for the h5 file
h5_path = "whisper_base.h5"

# Save the model state dictionary to an h5 file
torch.save(model.state_dict(), h5_path)

print(f"Model saved as {h5_path}")

ModuleNotFoundError: No module named 'whisper'

In [None]:
!pip install openai-whisper