In [1]:
import os
import whisper
from moviepy.editor import VideoFileClip
import json

# Step 1: Extract audio from video
def extract_audio(video_path, audio_path):
    print("Extracting audio...")
    video = VideoFileClip(video_path)
    video.audio.write_audiofile(audio_path, fps=16000)
    print(f"Audio saved as: {audio_path}")

# Step 2: Transcribe using Whisper
def transcribe_audio_whisper(audio_path):
    print("Loading Whisper model...")
    model = whisper.load_model("large")  # use "medium" or "large" for better results

    print("Transcribing audio...")
    result = model.transcribe(audio_path, language="hi", verbose=True)

    transcripts = []
    for segment in result["segments"]:
        transcripts.append({
            "start": round(segment["start"], 2),
            "end": round(segment["end"], 2),
            "transcription": segment["text"].strip()
        })

    return transcripts

# Step 3: Full pipeline
def process_video(video_path):
    audio_path = "extracted_audio.wav"
    extract_audio(video_path, audio_path)
    results = transcribe_audio_whisper(audio_path)
    
    # Save output as JSON
    with open("transcription_output.json", "w", encoding="utf-8") as f:
        json.dump(results, f, ensure_ascii=False, indent=2)

    print("Transcription saved to 'transcription_output.json'")
    return results

# Run the script
if __name__ == "__main__":
    video_file = "v3.mp4"
    process_video(video_file)

  from pkg_resources import resource_filename


Extracting audio...
MoviePy - Writing audio in extracted_audio.wav


                                                                                                                       

MoviePy - Done.
Audio saved as: extracted_audio.wav
Loading Whisper model...
Transcribing audio...
[00:01.000 --> 00:10.500]  प्रणाम प्रभु जी, कोई साधक कैसे समझे कि उसकी साधना मुक्तिदाई साधना है या कोई भटकाने वाली साधना है?
[00:13.300 --> 00:19.000]  साधना के नाम पर बहुत कुछ किया जाता है।


KeyboardInterrupt: 

In [7]:
import os
import json
from moviepy.editor import VideoFileClip
import whisper

def extract_audio(video_path, audio_path="extracted_audio.wav"):
    print("Extracting audio...")
    video = VideoFileClip(video_path)
    video.audio.write_audiofile(audio_path, codec="pcm_s16le", fps=16000)  # Save as WAV
    print(f"Audio saved as: {audio_path}")

def transcribe_audio_whisper(audio_path):
    print("Loading Whisper 'large' model (may take time)...")
    model = whisper.load_model("large")  # High-quality model

    print("Transcribing audio...")
    result = model.transcribe(audio_path, language="hi", verbose=True)

    transcripts = []
    for segment in result["segments"]:
        transcripts.append({
            "start": segment["start"],
            "end": segment["end"],
            "text": segment["text"]
        })

    return transcripts

def process_video(video_path):
    audio_path = "extracted_audio.wav"
    extract_audio(video_path, audio_path)
    results = transcribe_audio_whisper(audio_path)

    with open("transcription_output.json", "w", encoding="utf-8") as f:
        json.dump(results, f, ensure_ascii=False, indent=4)
    print("Transcription saved to: transcription_output.json")

if __name__ == "__main__":
    process_video("v1.mp4")


🧹 Old Whisper model cache cleared.
🎬 Extracting audio...
MoviePy - Writing audio in extracted_audio.wav


                                                                                                                       

MoviePy - Done.
✅ Audio saved as: extracted_audio.wav
🧠 Loading Whisper 'large' model (may take time)...


100%|█████████████████████████████████████| 2.88G/2.88G [10:33<00:00, 4.88MiB/s]


📝 Transcribing audio...
[00:00.000 --> 00:29.980]  उड़ जा, उड़ जा
[00:30.000 --> 00:31.180]  फिर उड़ते कैसे हैं?
[00:31.620 --> 00:32.780]  जामेन!
[00:36.700 --> 00:38.820]  आह! एरप्लेइन्स!
[00:39.100 --> 00:44.860]  मेटल और गियर से बनी इन बड़ी-बड़ी मशीनों को देखकर कितना मज़ा आता है ना?
[00:44.860 --> 00:48.360]  जो पूरी दुनिया के उपर उड़ान भरा करती हैं.
[00:48.580 --> 00:50.240]  पक्का मज़ा आता होगा.
[00:50.240 --> 01:00.940]  पर आज, जो बहुती आसान मालूम होता है, वो कुछ सौ साल पहले सिर्फ एक सपना भर हुआ करता था पूरी दुनिया के लिए, लोग सोच भी नहीं सकते थे.
[01:00.940 --> 01:09.560]  हजारों साल पहले, लोग उपर आसमान में देखा करते थे और सोचते थे कि वो उड़ पाएं तो कैसा होगा?
[01:10.440 --> 01:18.680]  ग्राविटी को हराने के लिए उन्होंने कई तरीके निकाले, पर बदकिस्मती से कभी कामियाब नहीं हो पाए.


KeyboardInterrupt: 