In [16]:
import re
import os
from pptx import Presentation
from pptx.util import Pt
from pptx.dml.color import RGBColor
from pptx.enum.text import PP_ALIGN
from PyPDF2 import PdfReader
from docx import Document

# === USER SETTINGS ===
input_file = "Round 1 to 6 High School Islamic Studies quiz 2025.docx"  # or .pdf
output_pptx = "Islamic_Studies_Quiz_Rounds_2_to_6.pptx"
font_name = "Calibri"
font_size = Pt(24)
line_spacing = Pt(38)          # Custom line spacing
top_padding = Pt(60)
left_padding = Pt(50)
right_padding = Pt(70)
#slide_width = Pt(960)
#slide_height = Pt(540)

# === TEXT EXTRACTORS ===
def extract_text_from_pdf(file_path):
    reader = PdfReader(file_path)
    return "\n".join(page.extract_text() for page in reader.pages)

def extract_text_from_docx(file_path):
    doc = Document(file_path)
    return "\n".join([para.text for para in doc.paragraphs])

# === LOAD TEXT ===
if input_file.lower().endswith(".pdf"):
    full_text = extract_text_from_pdf(input_file)
elif input_file.lower().endswith(".docx"):
    full_text = extract_text_from_docx(input_file)
else:
    raise ValueError("Unsupported file type. Use .pdf or .docx")

# === FILTER TO START FROM "Round 2" ===
start_index = full_text.find("Round 2")
if start_index == -1:
    raise ValueError('"Round 2" not found in document.')
quiz_text = full_text[start_index:]

# === REMOVE ANSWERS ===
quiz_text = re.sub(r"Answer:.*", "", quiz_text)

# === SPLIT INTO SLIDES ===
sections = re.split(r"\n{2,}", quiz_text.strip())

# === CREATE POWERPOINT ===
prs = Presentation()
blank_slide_layout = prs.slide_layouts[6]
question_number = 1

for section in sections:
    section = section.strip()
    if not section:
        continue

    # Detect section titles like "Swing 1" or "Round 3"
    is_title_slide = bool(re.match(r"(?i)(round|swing)\s+\d+", section))

    slide = prs.slides.add_slide(blank_slide_layout)

    # Calculate content box size
    text_width = slide_width - left_padding - right_padding
    txBox = slide.shapes.add_textbox(left_padding, top_padding, text_width, slide_height)
    tf = txBox.text_frame
    tf.word_wrap = True

    # Format the text paragraph
    paragraph = tf.add_paragraph()
    if is_title_slide:
        paragraph.text = section
        question_number = 1  # reset numbering
    else:
        paragraph.text = f"Q{question_number}. {section}"
        question_number += 1

    # Font and layout settings
    paragraph.font.size = font_size
    paragraph.font.name = font_name
    paragraph.font.color.rgb = RGBColor(0, 0, 0)
    paragraph.line_spacing = line_spacing
    paragraph.alignment = PP_ALIGN.LEFT

# === SAVE ===
prs.save(output_pptx)
print(f"✅ PPTX created: {output_pptx}")


✅ PPTX created: Islamic_Studies_Quiz_Rounds_2_to_6.pptx
