## Install the requirements

In [None]:
!pip install python-pptx==1.0.2
!pip install gtts==2.5.4
!pip install tqdm==4.67.1
!pip install pdf2image==1.17.0
!pip install moviepy==1.0.3
!pip install pydub==0.25.1
!pip install opencv-python==4.10.0.84

In [None]:
!apt-get update
!apt-get install libreoffice
!apt-get install poppler-utils
!apt-get installffmpeg

#1 . Extract the text from ppt slides
######Give the path of ppt in the variable "ppt_file_path"

In [None]:
from pptx import Presentation
from tqdm import tqdm  # Import tqdm for progress bar

def extract_ppt_content(ppt_path):
    """
    Extracts content from each slide of a PowerPoint presentation.

    Args:
        ppt_path (str): Path to the PowerPoint file.

    Returns:
        list: A list where each index contains the content of a slide.
    """
    # Load the presentation
    presentation = Presentation(ppt_path)
    slide_contents = []

    # Total number of slides
    total_slides = len(presentation.slides)

    # Loop through each slide with progress bar
    for slide in tqdm(presentation.slides, desc="Extracting Slides content", total=total_slides, unit="slide"):
        slide_text = []
        # Extract text from all shapes in the slide
        for shape in slide.shapes:
            if shape.has_text_frame:
                for paragraph in shape.text_frame.paragraphs:
                    slide_text.append(paragraph.text)
        slide_contents.append(" ".join(slide_text))

    return slide_contents

# Test the function
ppt_file_path = "demo_ppt.pptx"  # Replace with your PowerPoint file path
slides_content = extract_ppt_content(ppt_file_path)

# Display the content of the list
print(f"\n{slides_content}")



Extracting Slides content: 100%|██████████| 2/2 [00:00<00:00, 551.23slide/s]


['Omni RAG Studio Implemented:   - Integrated Omni RAG UI frontend with VERBA backend for querying the documents through RAG pipeline.   Functionalities-  Vector store : User can select different readers, chunking methods, and embedding models to process various data and store them into vector database.   RAG configuration : Configure the RAG pipeline by selecting from different retriever and generator models, such as Ollama, GPT-3/4. Chat interface: Use the Chat page to ask questions and receive relevant chunks and answers generated by the selected model.', 'Future Work:   RAG Features:   RAG Evaluation technique To work on evaluation of Rag pipeline using tools like RAGAS or Giskard. Advanced Querying  Task delegation based on LLM evaluation.   Omni RAG Studio']





#2. Convert the text contents to audio

In [None]:
from gtts import gTTS
from tqdm import tqdm
import os

def text_to_audio(slide_contents, output_folder="slide_audios"):
    """
    Converts a list of slide contents into audio files with a progress bar.

    Args:
        slide_contents (list): List where each index contains the content of a slide.
        output_folder (str): Folder to save the audio files.
    """
    # Ensure the output folder exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    print("Generating audio files for slides...")

    # Iterate through the slides with a progress bar
    for i, content in enumerate(tqdm(slide_contents, desc="Processing Slides", unit="slide"), start=1):
        if content.strip():  # Only process non-empty content
            audio_file = os.path.join(output_folder, f"slide_{i}.mp3")
            tts = gTTS(text=content, lang='en')
            tts.save(audio_file)
        else:
            print(f"Slide {i} has no content. Skipping...")

    print(f"\nAudio files saved in the folder: {output_folder}")

# Example usage
# Assuming `slides_content` is the list of content extracted from the PowerPoint slides
text_to_audio(slides_content)


Generating audio files for slides...


Processing Slides: 100%|██████████| 2/2 [00:02<00:00,  1.10s/slide]


Audio files saved in the folder: slide_audios





# 3. Extracting each slide as a frame

In [None]:
import os
import time
import subprocess
from pdf2image import convert_from_bytes

def convert_ppt_to_images(pptfile_path):
    img_format = "jpg"
    out_dir = "/content/slide_images"  # Output directory in Colab
    filename_base = os.path.basename(pptfile_path)
    filename_bare = os.path.splitext(filename_base)[0]

    # Convert PPTX to PDF using LibreOffice (soffice)
    command_list = ["soffice", "--headless", "--convert-to", "pdf", pptfile_path]
    subprocess.run(command_list)

    pdffile_name = filename_bare + ".pdf"
    with open(pdffile_name, "rb") as f:
        pdf_bytes = f.read()

    images = convert_from_bytes(pdf_bytes, dpi=96)

    if not os.path.exists(out_dir):
        os.mkdir(out_dir)

    for i, img in enumerate(images):
        img_name = os.path.join(out_dir, f"slide_{i+1}.{img_format}")
        img.save(img_name)

    if os.path.exists(pdffile_name):
      os.remove(pdffile_name)
      print(f"Removed created PDF file: {pdffile_name}")

    print(f"Conversion done, images saved in dir {out_dir}")

# Path to your PPT file in Colab
pptfile_path = "/content/demo_ppt.pptx"

convert_ppt_to_images(pptfile_path)


Removed created PDF file: RAG_Studio-sprint-Phase2.pdf
Conversion done, images saved in dir /content/slide_images


#4. Combine audios and frames to get video

In [None]:

import os
import cv2
from pydub import AudioSegment
import subprocess

def create_video_from_images_and_audio(image_folder, audio_folder, output_video_file):
    # Create a list of image and audio file pairs
    image_files = sorted([f for f in os.listdir(image_folder) if f.endswith('.jpg')])
    audio_files = sorted([f for f in os.listdir(audio_folder) if f.endswith('.mp3')])

    # Ensure the number of images and audio files match
    if len(image_files) != len(audio_files):
        print("Error: The number of images and audio files must be the same!")
        return

    # Create a temporary list to store the individual video parts
    video_parts = []

    # Create video parts for each image/audio pair
    for i, image_file in enumerate(image_files):
        # Read the image
        img_path = os.path.join(image_folder, image_file)
        img = cv2.imread(img_path)
        height, width, layers = img.shape

        # Create a temporary video file for this image
        temp_video_path = f"temp_video_part_{i}.mp4"
        video_writer = cv2.VideoWriter(temp_video_path, cv2.VideoWriter_fourcc(*'mp4v'), 8, (width, height))  # 8 fps
        video_writer.write(img)  # Write the single image as a video frame
        video_writer.release()

        # Process the corresponding audio
        audio_path = os.path.join(audio_folder, audio_files[i])
        audio_clip = AudioSegment.from_mp3(audio_path)

        # Save the audio as a temporary file
        temp_audio_path = f"temp_audio_part_{i}.mp3"
        audio_clip.export(temp_audio_path, format="mp3")

        # Combine the video part and the audio using FFmpeg
        output_video_with_audio = f"temp_video_part_{i}_with_audio.mp4"
        ffmpeg_command = [
            "ffmpeg", "-i", temp_video_path, "-i", temp_audio_path, "-c:v", "libx264", "-c:a", "aac", "-strict", "experimental", output_video_with_audio
        ]
        subprocess.run(ffmpeg_command)

        # Add the video part with audio to the list
        video_parts.append(output_video_with_audio)

        # Clean up temporary files
        os.remove(temp_video_path)
        os.remove(temp_audio_path)

    # Now, concatenate all the video parts using FFmpeg
    concat_file = "concat_list.txt"
    with open(concat_file, "w") as f:
        for video_part in video_parts:
            f.write(f"file '{video_part}'\n")

    # Final output video after merging all parts
    final_output = output_video_file.replace(".mp4", "_result.mp4")
    ffmpeg_concat_command = [
        "ffmpeg", "-f", "concat", "-safe", "0", "-i", concat_file, "-c", "copy", final_output
    ]
    subprocess.run(ffmpeg_concat_command)

    # Clean up temporary files and the concat list
    for video_part in video_parts:
        os.remove(video_part)
    os.remove(concat_file)

    print(f"Video created successfully: {final_output}")

# Example usage
image_folder = "/content/slide_images"  # Folder containing slide images
audio_folder = "/content/slide_audios"  # Folder containing slide audio files
output_video_file = "/content/output_video.mp4"  # Output video file path

create_video_from_images_and_audio(image_folder, audio_folder, output_video_file)


Video created successfully: /content/output_video_result.mp4


You can display the  output video using html

In [None]:
from IPython.display import HTML
from base64 import b64encode

# Replace 'your_video.mp4' with the actual video file name
video_path = "output_video_result.mp4"

# Convert video to base64 for embedding
video = open(video_path, "rb").read()
video_encoded = b64encode(video).decode()

# Embed video in Colab
video_html = f"""
<video width="640" height="480" controls>
    <source src="data:video/mp4;base64,{video_encoded}" type="video/mp4">
</video>
"""
HTML(video_html)
