# Mounting drive


In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Installing all necessary dependencies

In [None]:
!which ffmpeg

In [None]:
!pip install yt-dlp

In [None]:
!pip install moviepy
!pip install spleeter
!pip install noisereduce

In [None]:
!pip install -r /content/drive/MyDrive/videoQA/requirements.txt
!pip install whisper

In [None]:
!pip3 install -U scipy
%cd /content/drive/MyDrive/videoQA
!git clone https://github.com/jnordberg/tortoise-tts.git
%cd /content/drive/MyDrive/videoQA/tortoise-tts
!pip3 install -r requirements.txt
!pip3 install transformers==4.19.0 einops==0.5.0 rotary_embedding_torch==0.1.5 unidecode==1.3.5
!python3 setup.py install
!pip3 install -U pydub
!pip install moviepy==2.0.0.dev2
!pip install imageio==2.25.1
!apt install imagemagick
!pip install pysrt==1.1.2
!cat /etc/ImageMagick-6/policy.xml | sed 's/none/read,write/g'> /etc/ImageMagick-6/policy.xml

# Downloading essential dependencies for TTS synthesis

In [None]:
import torch
import torchaudio
import torch.nn as nn
import torch.nn.functional as F

import IPython

from tortoise.api import TextToSpeech
from tortoise.utils.audio import load_audio, load_voice, load_voices

# This will download all the models used by Tortoise from the HuggingFace hub.
tts = TextToSpeech()

# Installing transcription model

In [None]:
%cd /content/drive/MyDrive/videoQA
!pip install git+https://github.com/openai/whisper.git
!apt install ffmpeg

In [None]:
!pip install pytube

# Set of functions required

In [None]:
from openai import OpenAI
from pydub import AudioSegment
import torchaudio
import pandas as pd
import subprocess
import os
import locale
import pydub
import sys
import csv
import re
import subprocess
import ffmpeg
import numpy as np
import cv2
import json
from moviepy.editor import VideoFileClip,AudioFileClip, concatenate_videoclips
from tortoise.api import TextToSpeech
from tortoise.utils.audio import load_audio, load_voice, load_voices
from pytube import YouTube




def sanitize_filename(title):
    return re.sub(r'[^a-zA-Z0-9]', '_', title)

def download_media_from_youtube(youtube_url, download_path=".", ffmpeg_path="/usr/bin/ffmpeg"):
    yt = YouTube(youtube_url)
    video_title = sanitize_filename(yt.title)
    audio_output_file = os.path.join(download_path, f"{video_title}_audio.wav")
    video_output_file = os.path.join(download_path, f"{video_title}_video.mp4")

    # Download audio
    audio_command = [
        'yt-dlp',
        '-x',  # Extract audio only
        '--audio-format', 'wav',
        '--ffmpeg-location', ffmpeg_path,
        '-o', audio_output_file,
        youtube_url
    ]
    subprocess.run(audio_command, check=True)

    # Download video
    video_command = [
        'yt-dlp',
        '-f', 'bestvideo+bestaudio/best',  # Download the best quality available
        '--merge-output-format', 'mp4',
        '--ffmpeg-location', ffmpeg_path,
        '-o', video_output_file,
        youtube_url
    ]
    subprocess.run(video_command, check=True)

    return audio_output_file, video_output_file

def transcribe_audio_with_whisper(audio_file, model_size="base", language="English"):
    # Command to run Whisper for transcription
    command = [
        'whisper',
        audio_file,
        '--language', language,
        '--model', model_size
    ]
    subprocess.run(command)
    audio_path = os.path.dirname(audio_file)

    srt_file_name = os.path.splitext(os.path.basename(audio_file))[0] + ".srt"
    srt_file_path = os.path.join(audio_path, srt_file_name)

    return srt_file_path

def transcription(youtube_url):
    print("Downloading media from YouTube...")
    audio_file, video_file = download_media_from_youtube(youtube_url)

    print(f"Audio File: {audio_file}\nVideo File: {video_file}")

    print("Transcribing audio with Whisper...")
    srt_path = transcribe_audio_with_whisper(audio_file)

    print("Transcription completed.")
    return video_file,srt_path

def validate_file(file_path):
    """
    Validates if the provided file path is an existing SRT file.
    """
    if not os.path.isfile(file_path):
        print(f"The file {file_path} does not exist.")
        sys.exit(1)
    if not file_path.lower().endswith('.srt'):
        print("The file is not a valid SRT file.")
        sys.exit(1)

def read_srt(file_path):
    """
    Reads the content of the SRT file, preserving its structure.
    """
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.readlines()

def write_txt(content, file_path):
    """
    Creates a new TXT file and writes the SRT content to it with the required formatting.
    """
    new_file_path = file_path.rsplit('.', 1)[0] + '.txt'
    with open(new_file_path, 'w', encoding='utf-8') as file:
        entry = []
        for line in content:
            if line.strip().isdigit():
                if entry:
                    file.write('\n'.join(entry) + '\n\n')
                    entry = []
                entry.append(line.strip())
            elif '-->' in line:
                entry.append(line.strip() + ' ')
            else:
                if line.strip():  # This avoids writing blank lines within subtitle entries
                    entry.append(line.strip())
        if entry:  # Write the last entry if the file doesn't end with a newline
            file.write('\n'.join(entry) + '\n')
    return new_file_path

def convert_srt_to_txt(file_path):
    """
    Converts an SRT file to a TXT file, handling errors gracefully.
    """
    try:
        validate_file(file_path)
        content = read_srt(file_path)
        new_file_path = write_txt(content, file_path)
        print(f"Conversion successful. TXT file created at {new_file_path}")
        return new_file_path
    except Exception as e:
        print(f"An error occurred: {e}")

def read_transcript_from_file(file_path):
    """
    Reads the transcript text from a given file path.
    """
    with open(file_path, 'r', encoding='utf-8') as file:
        transcript = file.read()
    return transcript

def generate_learning_activities(transcript):
    """
    Sends a transcript to ChatGPT to generate ideas for fun learning activities.
    """
    prompt_text = transcript[:min(len(transcript), 8000)]  # Adjust based on your token budget
    messages = [
    {
        "role": "system",
        "content": "You are a helpful chapter generator for video transcripts. Your task is to analyze the transcript content and identify changes in topic or content to generate chapters. For each identified chapter, generate a concise and descriptive chapter title or summary that captures the main topic or content of that chapter. Additionally, generate up to one question related to the content of each chapter to encourage critical thinking and understanding. Also, generate the answer to the question you will generate for each chapter. Present the output in the following format without any special characters or formatting: 'Chapter No. -', 'Chapter Name -', 'Chapter Start time -', 'Chapter End Time -', 'Chapter Question -', 'Chapter Answer -'. Ensure that each chapter detail is clearly separated and presented in a straightforward manner. Ensure that the discussion on each topic is finished and then generate the aforementioned things. Also, Ensure both the Question and the answer are short in length and concise and are evenly spaced out between topics. Segment topics into relevant chapters only."
    },
    {
        "role": "user",
        "content": f"Based on the following transcript, generate chapter titles, descriptions, questions, answers and the requested information in the specified format:\n\n{prompt_text}"
    }]


    response = client.chat.completions.create(model="gpt-4o",
    messages=messages,
    temperature=0.5,
    max_tokens=1000,
    top_p=1.0,
    frequency_penalty=0.0,
    presence_penalty=0.0)

    if response.choices and len(response.choices) > 0:
        last_message = response.choices[0].message.content
        return last_message.strip()
    else:
        return "No activities could be generated."

def write_output_to_file(activities, output_file_path):
    """
    Writes the generated learning activities to a specified text file.

    Args:
    - activities (str): The generated activities to write.
    - output_file_path (str): The path of the output text file.
    """
    with open(output_file_path, 'w', encoding='utf-8') as file:
        file.write(activities)

def summarized_text(file_path, output_file_path):
    """
    Orchestrates the process of reading a transcript and generating learning activities,
    then writes the activities to a specified text file.
    """
    transcript = read_transcript_from_file(file_path)
    input_dir = os.path.dirname(file_path)
    output_path = os.path.join(input_dir, output_file_path)
    activities = generate_learning_activities(transcript)
    write_output_to_file(activities, output_path)
    print(f"Suggested Learning Activities written to {output_file_path}")
    return output_path




def parse_chapter_info_from_file(input_file_path):
    # Read the contents of the file
    print(input_file_path)
    with open(input_file_path, 'r', encoding='utf-8') as file:
        text = file.read()
    print(text)
    # Regular expression to capture the relevant chapter details
    chapter_pattern = re.compile(
        r'Chapter No\. - (\d+)\s*'  # Capture chapter number
        r'Chapter Name - (.*?)\s*'  # Capture chapter name
        r'Chapter Start time - (.*?)\s*'  # Capture start time
        r'Chapter End Time - (.*?)\s*'  # Capture end time
        r'Chapter Question - (.*?)\s*'  # Capture question
        r'Chapter Answer - (.*?)\s*(?=Chapter No\. - \d+|$)',  # Capture answer, lookahead for next chapter or end of string
        re.DOTALL  # Dot matches newline as well
    )

    return chapter_pattern.findall(text)

def write_to_csv(chapters, output_path):
    print(chapters)
    print(output_path)
    with open(output_path, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['Chapter No.', 'Chapter Name', 'Chapter Start time', 'Chapter End Time', 'Chapter Question', 'Chapter Answer']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        writer.writeheader()
        for chapter in chapters:
            writer.writerow({
                'Chapter No.': chapter[0],
                'Chapter Name': chapter[1],
                'Chapter Start time': chapter[2],
                'Chapter End Time': chapter[3],
                'Chapter Question': chapter[4],
                'Chapter Answer': chapter[5]
            })



def output_csv(text_summary_path):
    # The path to the input file uploaded by the user
    # input_file_path = 'learning_activities.txt'  # This will be replaced by the path of the uploaded file

    # Parse the chapter information from the file
    chapters = parse_chapter_info_from_file(text_summary_path)

    # Specify the path to save the output CSV file

    csv_path = 'chapter_details_from_file2.csv'
    text_summary_dir = os.path.dirname(text_summary_path)
    output_csv_path = os.path.join(text_summary_dir, csv_path)
    # Write the chapter information to a CSV file
    write_to_csv(chapters, output_csv_path)

    # Output the path to the created CSV file
    return output_csv_path

def get_video_info(video_path):
    ffprobe_command = [
        'ffprobe', '-v', 'error', '-show_entries', 'stream=width,height,r_frame_rate,pix_fmt', '-of', 'json', video_path
    ]
    result = subprocess.run(ffprobe_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    output = result.stdout.decode()
    if output:
        video_info = json.loads(output)
        return video_info
    else:
        return None

def check_video_info(video_info, expected_info):
    if video_info is not None and 'streams' in video_info:
        stream = video_info['streams'][0]
        width = stream.get('width', None)
        height = stream.get('height', None)
        # aspect_ratio = stream.get('display_aspect_ratio', None)
        frame_rate = stream.get('r_frame_rate', None)
        pixel_format = stream.get('pix_fmt', None)
        # try:
        if width != expected_info['streams'][0]['width'] or height != expected_info['streams'][0]['height'] or frame_rate != expected_info['streams'][0]['r_frame_rate'] or pixel_format != expected_info['streams'][0]['pix_fmt']:
            print("width: ",width != expected_info['streams'][0]['width'],f"{width} , {expected_info['streams'][0]['width']}")
            print("height: ",height != expected_info['streams'][0]['height'])
            print("frame_rate: ",frame_rate != expected_info['streams'][0]['r_frame_rate'])
            print("pixel format: ",pixel_format != expected_info['streams'][0]['pix_fmt'])
            return False

        return True
    else:
        return False

def video_duration(video_path):
    ffprobe_command = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', video_path]
    result = subprocess.run(ffprobe_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    output = result.stdout.decode().strip()
    duration = float(output) if output else 0.0
    return duration

def convert_timestamp_to_seconds(time_str):
    parts = time_str.strip('-').split(':')
    hours = int(parts[0])
    minutes = int(parts[1])
    seconds = int(parts[2].split(',')[0])
    # milliseconds = int(parts[2].split(',')[1])

    total_seconds = (hours * 3600) + (minutes * 60) + seconds

    return total_seconds

def generate_blank_image(width, height):
    # Create a blank white image
    return 255 * np.ones(shape=[height, width, 3], dtype=np.uint8)

def split_video(input_video, output_dir, df, expected_info):
    for index, row in df.iterrows():
        start_timestamp = row['Chapter Start time']
        end_timestamp = row['Chapter End Time']

        # Convert timestamps to seconds
        start_seconds = convert_timestamp_to_seconds(start_timestamp)
        end_seconds = convert_timestamp_to_seconds(end_timestamp)

        output_path = os.path.join(output_dir, f'part_{index}.mp4')

        # Use MoviePy to extract the desired segment
        clip = VideoFileClip(input_video).subclip(start_seconds, end_seconds)
        clip.write_videofile(output_path, codec='libx264', audio_codec='aac')
        video_info = get_video_info(output_path)
        # if not check_video_info(video_info, expected_info):
        #     raise ValueError(f"Generated video '{output_path}' does not match expected video information.")

    last_clip = video_duration(input_video)
    last_output_path = os.path.join(output_dir, "part_end.mp4")
    clip = VideoFileClip(input_video).subclip(end_seconds, last_clip)
    clip.write_videofile(last_output_path, codec='libx264', audio_codec='aac')
    video_info = get_video_info(last_output_path)
    # if not check_video_info(video_info, expected_info):
    #     raise ValueError(f"Generated video '{last_output_path}' does not match expected video information.")


def split_text_into_lines(text, font, font_scale, font_thickness, max_width):
    # Split text into lines based on the maximum width allowed
    words = text.split(' ')
    lines = []
    current_line = words[0]

    for word in words[1:]:
        test_line = current_line + ' ' + word
        text_width = cv2.getTextSize(test_line, font, font_scale, font_thickness)[0][0]
        if text_width <= max_width:
            current_line = test_line
        else:
            lines.append(current_line)
            current_line = word

    lines.append(current_line)
    return lines


def training_voice(name, path):
  CUSTOM_VOICE_NAME = name
  custom_voice_folder = f"tortoise/voices/{CUSTOM_VOICE_NAME}"

  os.makedirs(custom_voice_folder, exist_ok=True)

  with open(path, 'rb') as f:
      file_data = f.read()
      with open(os.path.join(custom_voice_folder, f'{CUSTOM_VOICE_NAME}.wav'), 'wb') as new_file:
          new_file.write(file_data)



def GettingSpeakerAudio(audio_file, txt_file):
  # Function to convert audio segment to WAV format

    # Function to segment audio and get transcripts
    def segment_audio(audio_file, txt_file):
        # Load audio file
        # audio = AudioSegment.from_file(audio_file, format="mp3")

        segments = {}
        segments['transcript'] = ""
        segments['filename'] = f"{audio_file}"
        # Load TXT file
        with open(txt_file, 'r', encoding='utf-8') as file:
            txt_data = file.read().split('\n\n')
            counter = 0
            for data in txt_data:
                data = data.split('\n')
                transcript = data[2]
                segments["transcript"] += transcript
        return segments

    audio_segments = segment_audio(audio_file, txt_file)
    return audio_segments



# Generate speech with the custotm voice.
def mainThing(text, name, preset):
    # Assuming load_voice and tts are defined elsewhere and imported correctly

    voice_samples, conditioning_latents = load_voice(name)
    gen = tts.tts_with_preset(text, voice_samples=voice_samples, conditioning_latents=conditioning_latents, preset=preset)
    torchaudio.save(f'/content/drive/MyDrive/videoQA/output/{name}.wav', gen.squeeze(0).cpu(), 24000)

def generate_new_speech(audio_path,df,preset):
    # Correct the path to point to the generated file correctly
    # audio_path = f"/content/drive/MyDrive/MovieDubbing_Samriddha/MovieDubbing/Part1_Output/{name}.wav"

    for index,row in df.iterrows():
        question = row['Chapter Question']
        name = f"question_{index}"
        training_voice(name, audio_path)  # Assuming this correctly copies the file to the needed location

        print(f"Generating for ... {name}")
        mainThing(question,name, preset)

    for index,row in df.iterrows():
        answer = row['Chapter Answer']
        name = f"answer_{index}"
        training_voice(name, audio_path)  # Assuming this correctly copies the file to the needed location

        print(f"Generating for ... {name}")
        mainThing(answer,name, preset)

def generate_question_images(df, output_dir, expected_info):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Generate a blank image with specified dimensions
    width = expected_info['streams'][0]['width']
    height = expected_info['streams'][0]['height']
    blank_image = generate_blank_image(width, height)

    for index, row in df.iterrows():
        question = row['Chapter Question']
        question_image_path = os.path.join(output_dir, f'question_{index}.jpg')

        # Create a copy of the blank image to overlay the question text
        image = blank_image.copy()

        # Get the size of the text
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 1
        font_thickness = 2
        font_color = (0, 0, 0)

        # Add header "Question:"
        header_text = "Question:"
        header_font_scale = 0.75  # Halve the font size
        header_font_thickness = int(1.3 * font_thickness)
        header_text_width = cv2.getTextSize(header_text, font, header_font_scale, header_font_thickness)[0][0]
        header_text_x = (width - header_text_width) // 2
        header_text_y = height // 12  # Shift it upwards
        cv2.putText(image, header_text, (header_text_x, header_text_y), font, header_font_scale, font_color, header_font_thickness)

        # Add underline for the header
        underline_y = header_text_y + 10
        underline_start = (width - header_text_width) // 2
        underline_end = underline_start + header_text_width
        cv2.line(image, (underline_start, underline_y), (underline_end, underline_y), font_color, thickness=2)

        # Split the question into lines based on image width
        max_width = width - 20  # 20 pixels padding on both sides
        lines = split_text_into_lines(question, font, font_scale, font_thickness, max_width)

        # Calculate vertical spacing between lines
        text_height = cv2.getTextSize('A', font, font_scale, font_thickness)[0][1] + 10  # Add some vertical spacing
        total_text_height = text_height * len(lines)
        starting_y = (height - total_text_height) // 4

        # Write lines onto the image
        for line in lines:
            text_width = cv2.getTextSize(line, font, font_scale, font_thickness)[0][0]
            text_x = (width - text_width) // 2
            cv2.putText(image, line, (text_x, starting_y), font, font_scale, font_color, font_thickness)
            starting_y += text_height

        # Save the generated question image
        cv2.imwrite(question_image_path, image)



def generate_question_video(image_path, video_path, duration, expected_info):
    # Extract frame rate and pixel format from expected_info
    frame_rate = expected_info['streams'][0]['r_frame_rate']
    pixel_format = expected_info['streams'][0]['pix_fmt']

    ffmpeg_command = [
        'ffmpeg', '-loop', '1', '-i', image_path, '-r', frame_rate, '-c:v', 'libx264', '-t', str(duration), '-pix_fmt', pixel_format, video_path
    ]
    subprocess.run(ffmpeg_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

def get_adjusted_duration(file_path):

    audio = AudioSegment.from_wav(file_path)
    total_duration = len(audio)

    # Add 1 extra second
    total_duration += 1000  # Duration is in milliseconds

    # Format the duration into hours:minutes:seconds.milliseconds
    adjusted_duration = str(total_duration // 3600000).zfill(2) + ":" + str((total_duration // 60000) % 60).zfill(2) + ":" + str((total_duration // 1000) % 60).zfill(2) + "." + str(total_duration % 1000)

    return adjusted_duration

def question_videos(df, output_dir, expected_info):
    for index, row in df.iterrows():
        image_path = os.path.join(output_dir, f'question_{index}.jpg')
        video_path = os.path.join(output_dir, f'question_{index}.mp4')
        question_video_duration = get_adjusted_duration(os.path.join(output_dir, f'question_{index}.wav'))
        generate_question_video(image_path, video_path, question_video_duration, expected_info)
        video_info = get_video_info(video_path)
        if not check_video_info(video_info, expected_info):
            raise ValueError(f"Generated question video '{video_path}' does not match expected video information.")

def generate_answer_images(df, output_dir, expected_info):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Generate a blank image with specified dimensions
    width = expected_info['streams'][0]['width']
    height = expected_info['streams'][0]['height']
    blank_image = generate_blank_image(width, height)

    for index, row in df.iterrows():
        question = row['Chapter Question']
        answer = row['Chapter Answer']
        answer_image_path = os.path.join(output_dir, f'answer_{index}.jpg')

        # Create a copy of the blank image to overlay the answer text
        image = blank_image.copy()

        # Get the size of the text
        font = cv2.FONT_HERSHEY_SIMPLEX
        font_scale = 1
        font_thickness = 2
        font_color = (0, 0, 0)

        # Add header "Question:"
        question_header_text = "Question:"
        question_header_font_scale = 0.75  # Halve the font size
        question_header_font_thickness = int(1.3 * font_thickness)
        question_header_text_width = cv2.getTextSize(question_header_text, font, question_header_font_scale, question_header_font_thickness)[0][0]
        question_header_text_x = (width - question_header_text_width) // 2
        question_header_text_y = height // 12  # Shift it upwards
        cv2.putText(image, question_header_text, (question_header_text_x, question_header_text_y), font, question_header_font_scale, font_color, question_header_font_thickness)

        # Add underline for the header
        question_underline_y = question_header_text_y + 10
        question_underline_start = (width - question_header_text_width) // 2
        question_underline_end = question_underline_start + question_header_text_width
        cv2.line(image, (question_underline_start, question_underline_y), (question_underline_end, question_underline_y), font_color, thickness=2)

        # Split the question into lines based on image width
        max_width = width - 20  # 20 pixels padding on both sides
        lines = split_text_into_lines(question, font, font_scale, font_thickness, max_width)

        # Calculate vertical spacing between lines
        text_height = cv2.getTextSize('A', font, font_scale, font_thickness)[0][1] + 10  # Add some vertical spacing
        total_text_height = text_height * len(lines)
        starting_y = (height - total_text_height) // 4

        # Write lines onto the image
        for line in lines:
            text_width = cv2.getTextSize(line, font, font_scale, font_thickness)[0][0]
            text_x = (width - text_width) // 2
            cv2.putText(image, line, (text_x, starting_y), font, font_scale, font_color, font_thickness)
            starting_y += text_height

        # Add header "Answer:"
        answer_header_text = "Answer:"
        answer_header_font_scale = 0.75  # Halve the font size
        answer_header_font_thickness = int(1.3 * font_thickness)
        answer_header_text_width = cv2.getTextSize(answer_header_text, font, answer_header_font_scale, answer_header_font_thickness)[0][0]
        answer_header_text_x = (width - answer_header_text_width) // 2
        answer_header_text_y = round(height // 2.1)  # Shift it upwards
        cv2.putText(image, answer_header_text, (answer_header_text_x, answer_header_text_y), font, answer_header_font_scale, font_color, answer_header_font_thickness)

        # Add underline for the header
        answer_underline_y = answer_header_text_y + 10
        answer_underline_start = (width - answer_header_text_width) // 2
        answer_underline_end = answer_underline_start + answer_header_text_width
        cv2.line(image, (answer_underline_start, answer_underline_y), (answer_underline_end, answer_underline_y), font_color, thickness=2)

        # Split the answer into lines based on image width
        max_width = width - 20  # 20 pixels padding on both sides
        lines = split_text_into_lines(answer, font, font_scale, font_thickness, max_width)

        # Calculate vertical spacing between lines
        text_height = cv2.getTextSize('A', font, font_scale, font_thickness)[0][1] + 10  # Add some vertical spacing
        total_text_height = text_height * len(lines)
        starting_y = round((height - total_text_height) // 1.1)

        # Write lines onto the image
        for line in lines:
            text_width = cv2.getTextSize(line, font, font_scale, font_thickness)[0][0]
            text_x = (width - text_width) // 2
            cv2.putText(image, line, (text_x, starting_y), font, font_scale, font_color, font_thickness)
            starting_y += text_height

        # Save the generated answer image
        cv2.imwrite(answer_image_path, image)

def set_audio_for_video(output_dir,df):
    for index,row in df.iterrows():
        video = f'question_{index}.mp4'
        audio = f'question_{index}.wav'
        video_clip = VideoFileClip(os.path.join(output_dir,video))
        audio_clip = AudioFileClip(os.path.join(output_dir,audio))
        final_clip = video_clip.set_audio(audio_clip)
        output_path = os.path.join(output_dir, video)
        final_clip.write_videofile(output_path)
    for index,row in df.iterrows():
        video = f'answer_{index}.mp4'
        audio = f'answer_{index}.wav'
        video_clip = VideoFileClip(os.path.join(output_dir,video))
        audio_clip = AudioFileClip(os.path.join(output_dir,audio))
        final_clip = video_clip.set_audio(audio_clip)
        output_path = os.path.join(output_dir, video)
        final_clip.write_videofile(output_path)


def generate_answer_video(image_path, video_path, duration, expected_info):
    # Extract frame rate and pixel format from expected_info
    frame_rate = expected_info['streams'][0]['r_frame_rate']
    pixel_format = expected_info['streams'][0]['pix_fmt']

    ffmpeg_command = [
        'ffmpeg', '-loop', '1', '-i', image_path, '-r', frame_rate, '-c:v', 'libx264', '-t', str(duration), '-pix_fmt', pixel_format, video_path
    ]
    subprocess.run(ffmpeg_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

def answer_videos(df, output_dir, expected_info):
    for index, row in df.iterrows():
        image_path = os.path.join(output_dir, f'answer_{index}.jpg')
        video_path = os.path.join(output_dir, f'answer_{index}.mp4')
        answer_video_duration = get_adjusted_duration(os.path.join(output_dir, f'answer_{index}.wav'))
        generate_answer_video(image_path, video_path, answer_video_duration, expected_info)
        video_info = get_video_info(video_path)
        if not check_video_info(video_info, expected_info):
            raise ValueError(f"Generated answer video '{video_path}' does not match expected video information.")

def merge_videos(df, output_video, output_dir):
    x = os.listdir(output_dir)
    a = [i for i in x if i.startswith("part") and i.endswith('.mp4')]
    b = [i for i in x if i.startswith("question")and i.endswith('.mp4')]
    c = [i for i in x if i.startswith("answer")and i.endswith('.mp4')]
    def combine_lists(a, b, c):
        x = []
        min_length = min(len(a), len(b), len(c))  # Find the minimum length among the three lists
        for i in range(min_length):
            x.append(a[i])
            x.append(b[i])
            x.append(c[i])

        # Append remaining elements of the longer lists
        if len(a) > min_length:
            x.extend(a[min_length:])
        if len(b) > min_length:
            x.extend(b[min_length:])
        if len(c) > min_length:
            x.extend(c[min_length:])

        return x
    combined = combine_lists(a,b,c)

    video_clips = []
    for i in combined:
        video_path = os.path.join(output_dir, i)
        video_clip = VideoFileClip(video_path)
        video_clips.append(video_clip)

    final_clip = concatenate_videoclips(video_clips)
    final_clip.write_videofile(f"{output_video}")

def clear_curr_directory():
    cwd = os.getcwd()
    contents = os.listdir(cwd)
    for i in contents:
        if "requirements" not in str(i) and (i.endswith('.json') or i.endswith('.vtt') or i.endswith('.tsv') or i.endswith('.srt') or i.endswith('wav') or i.endswith('.txt') or i.endswith('.mp3') or i.endswith('.csv')):
            os.remove(os.path.join(cwd,i))



def clear_output_directory(output_dir):
    contents = os.listdir(output_dir)
    for i in contents:
        if i.startswith("question") or i.startswith("answer") or i.startswith("part"):
            os.remove(os.path.join(output_dir,i))

# Using functions in order to generate desired output

In [None]:
#Enter the url to the youtube video
youtube_url = input("Enter the YouTube URL: ")
input_video,subtitle_file_path = transcription(youtube_url)
print(input_video, subtitle_file_path)
# Converting .srt to text file
text_file_path = convert_srt_to_txt(subtitle_file_path)
print(text_file_path)
#Calling OpenAI
api_key = "your-api-key" #Enter your api key here
client = OpenAI(api_key=api_key)
output_file_path = "learning_activities.txt"  # Desired path for the output file
text_summary_path = summarized_text(text_file_path, output_file_path)

# Run the main function and get the path to the output CSV file
output_csv_file_path = output_csv(text_summary_path)
df = pd.read_csv(output_csv_file_path)
videoname = os.path.basename(input_video).split('.')[0]
output_dir = '/content/drive/MyDrive/videoQA/output'
output_video = f'/content/drive/MyDrive/videoQA/output/final_{videoname}.mp4'
expected_info = get_video_info(input_video)

if not os.path.exists(output_dir):
  os.makedirs(output_dir)
try:
  split_video(input_video, output_dir, df, expected_info)
except Exception as e:
  print(e)
locale.getpreferredencoding = lambda: "UTF-8" # For Overriding the pydub install error
generate_question_images(df, output_dir,expected_info)
generate_answer_images(df, output_dir,expected_info)
use_denoiser = False
try:
  if use_denoiser:
    %cd /content/drive/MyDrive/videoQA
    !python MovieDubbing_Part1.py $input_video --denoise
  else:
    !python MovieDubbing_Part1.py $input_video
except Exception as e:
  print(e)
print("done denoising")
print("done spleeting")
subtitle_txt_path = text_file_path
audio_path=os.path.join(os.getcwd(),videoname)

audio_path = os.path.join(audio_path,'vocals.wav')
print(audio_path)
# Pick a "preset mode" to determine quality. Options: {"ultra_fast", "fast" (default), "standard", "high_quality"}. See docs in api.py
preset = "fast"
audio_segments = GettingSpeakerAudio(audio_path,subtitle_txt_path)

%cd /content/drive/MyDrive/videoQA/tortoise-tts/
generate_new_speech(audio_path,df,preset)
%cd /content/drive/MyDrive/videoQA/



question_videos(df,output_dir, expected_info)
answer_videos(df,output_dir, expected_info)
set_audio_for_video(output_dir, df)
# Concatenate videos and question images
merge_videos(df, output_video, output_dir)
clear_curr_directory()
clear_output_directory(output_dir)

