In [None]:
import subprocess
from pydub import AudioSegment
import speech_recognition as sr
from transformers import pipeline
import os
import time

# Step 1: Extract audio from the video using ffmpeg
def extract_audio_from_video(video_file, audio_file):
    try:
        subprocess.run(['ffmpeg', '-i', video_file, '-vn', '-acodec', 'pcm_s16le', '-ar', '16000', '-ac', '1', audio_file], check=True)
        print(f"Audio extracted successfully to {audio_file}")
    except subprocess.CalledProcessError as e:
        print(f"Error during audio extraction: {e}")
        raise

# Step 2: Convert audio to mono using pydub
def convert_to_mono(audio_file):
    try:
        audio = AudioSegment.from_wav(audio_file)
        mono_audio = audio.set_channels(1)  # Convert to mono
        mono_audio.export(audio_file, format="wav")
        print(f"Audio converted to mono successfully.")
    except Exception as e:
        print(f"Error during audio conversion: {e}")
        raise

# Step 3: Perform speech-to-text using SpeechRecognition
def transcribe_audio(audio_file):
    recognizer = sr.Recognizer()
    try:
        with sr.AudioFile(audio_file) as source:
            audio_data = recognizer.record(source)
            print("Recognizing speech...")
            text = recognizer.recognize_google(audio_data)
            return text
    except Exception as e:
        print(f"Error during speech recognition: {e}")
        raise

# Step 4: Summarize the transcribed text using Hugging Face transformers
def summarize_text(text):
    try:
        summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
        summary = summarizer(text, max_length=150, min_length=50, do_sample=False)
        return summary[0]['summary_text']
    except Exception as e:
        print(f"Error during text summarization: {e}")
        raise

# Step 5: Main function to process the video file
def process_video_to_summary(video_file, output_summary_file):
    audio_file = "temp_audio.wav"
    
    try:
        # Step 1: Extract audio from video
        extract_audio_from_video(video_file, audio_file)
        
        # Step 2: Convert audio to mono
        convert_to_mono(audio_file)
        
        # Step 3: Transcribe the audio
        print("Transcribing the audio to text...")
        text = transcribe_audio(audio_file)
        
        # Step 4: Summarize the text
        print("Summarizing the transcription...")
        summary = summarize_text(text)
        
        # Step 5: Save the summary to a file
        with open(output_summary_file, "w", encoding="utf-8") as file:
            file.write(summary)
        
        print(f"Summary saved to {output_summary_file}")
    
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        # Clean up temporary files
        if os.path.exists(audio_file):
            os.remove(audio_file)

# Example usage:
if __name__ == "__main__":
    start_time = time.time()  # Record the start time
    
    video_file = "project_sample.mp4"  # Replace with your video file path

    # Take user input for the output summary file name
    output_summary_file = input("Please enter the name for the output summary file (e.g., summarized_transcript.txt): ")

    # Ensure '.txt' is added to the filename if not provided
    if not output_summary_file.endswith('.txt'):
        output_summary_file += '.txt'

    # Process the video to summary
    process_video_to_summary(video_file, output_summary_file)
    
    # Calculate and print the total execution time
    end_time = time.time()  # Record the end time
    total_time = end_time - start_time  # Calculate the total time taken
    minutes, seconds = divmod(total_time, 60)  # Convert time to minutes and seconds
    print(f"\nTotal execution time: {int(minutes)} minutes and {int(seconds)} seconds.")
