In [1]:
import random
from pathlib import Path
from string import ascii_letters

import aspose.pydrawing as drawing
import aspose.slides as slides
from pptx import Presentation

In [2]:
# Number of new PowerPoint files to be created
number = 5
# Source directory where the PowerPoint files are located
source_dir = Path("C:/Users/ASUS/Documents/M. Data science/thesis/data_0")
# Output directory where the new PowerPoint files will be saved
output_dir = Path("C:/Users/ASUS/Documents/M. Data science/thesis/output")
# Number of images to include in each new PowerPoint file
images_per_powerpoint = 3
# Whether to search for PowerPoint files recursively in the source directory
recursive = True

source_dir = Path(source_dir)
output_dir = Path(output_dir)
# Create the output directory if it doesn't exist
output_dir.mkdir(exist_ok=True)

In [3]:
# Create a temporary folder to store the extracted images
temp_folder = Path("temp")
temp_folder.mkdir(exist_ok=True)


In [4]:
# Function to get the image format
def get_image_format(image_type):
    # Map the image type string to the ImageFormat enumeration
    return {
        "jpeg": drawing.imaging.ImageFormat.jpeg,
        "emf": drawing.imaging.ImageFormat.emf,
        "bmp": drawing.imaging.ImageFormat.bmp,
        "png": drawing.imaging.ImageFormat.png,
        "wmf": drawing.imaging.ImageFormat.wmf,
        "gif": drawing.imaging.ImageFormat.gif,
    }.get(image_type, drawing.imaging.ImageFormat.jpeg)


In [5]:

# Function to extract images from the PowerPoint file
def extract_images(file: Path):
    image_paths = []
    # List to store the path of the extracted images
    with slides.Presentation(str(file.absolute())) as presentation:
        # Loop through all the slides in the presentation
        for slide in presentation.slides:
            # Loop through all the shapes in the slide
            for shape_index, shape in enumerate(slide.shapes):
                shape_image = None

                # Check if the shape is a picture frame
                if type(shape) is slides.PictureFrame:
                    shape_image = shape.picture_format.picture.image
                # If the shape is an image, extract it
                if shape_image is not None:
                    # Get the content type of the image
                    image_type = shape_image.content_type.split("/")[1]
                    # Get the image format
                    image_format = get_image_format(image_type)
                    # Generate a random file name for the image
                    image_filename = "".join(random.choices(ascii_letters, k=10))
                    # Create the path for the image file
                    image_path = (
                            temp_folder / f"{image_filename}.{str(image_type).lower()}"
                    )
                    # Add the image path to the list
                    image_paths.append(image_path)
                    # Save the image to the file system
                    shape_image.system_image.save(
                        str(image_path.absolute()), image_format
                    )

        # Return the list of image paths
        return image_paths

In [6]:
# Get the list of PowerPoint files in the source directory
glob_regex = "**/*" if recursive else "*"
filenames = [f for f in source_dir.glob(glob_regex) if f.suffix in [".pptx"]]

# Get all the images from the PowerPoint files
images = []
for f in filenames:
    images.extend(extract_images(f))

In [7]:
# Create the new PowerPoint files
for i in range(number):
    filename = output_dir / f"powerpoint_{i}.pptx"
    new_prs: Presentation = Presentation()
    # Get a random sample of images
    chosen_images = random.choices(images, k=images_per_powerpoint)

    slide_height = new_prs.slide_height

    # Add the images to the new PowerPoint file
    for image in chosen_images:
        # Add a new slide
        new_slide = new_prs.slides.add_slide(new_prs.slide_layouts[6])
        # Add the image to the slide
        new_slide.shapes.add_picture(str(image), 0, 0, height=slide_height)

    # Save the new PowerPoint file
    new_prs.save(filename)

# Delete the temporary folder and its contents
for f in temp_folder.glob("*"):
    f.unlink()
temp_folder.rmdir()