In [1]:
# Installation
# Run these commands to install the necessary libraries
!pip install openai
!pip install PyMuPDF



In [3]:
import fitz  # PyMuPDF
import os
import requests
import base64
import time

# Function to convert PDF pages to images
def pdf_to_images(pdf_path, output_folder):
    """Convert PDF pages to images."""
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    doc = fitz.open(pdf_path)
    image_paths = []

    for page_num in range(len(doc)):
        page = doc[page_num]
        pix = page.get_pixmap()
        image_path = os.path.join(output_folder, f"page_{page_num + 1}.png")
        pix.save(image_path)
        image_paths.append(image_path)
        print(f"Saved page_{page_num + 1}.png")

    doc.close()
    return image_paths

# Function to get image description from Azure OpenAI
def get_image_description(image_path, api_key, endpoint):
    """Request image description using Azure OpenAI."""
    with open(image_path, "rb") as image_file:
        encoded_image = base64.b64encode(image_file.read()).decode('ascii')

    headers = {
        "Content-Type": "application/json",
        "api-key": api_key,
    }

    payload = {
        "messages": [
            {
                "role": "system",
                "content": [
                    {
                        "type": "text",
                        "text": "You are an AI assistant specialized in understanding and generating content from visual data."
                    }
                ]
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/png;base64,{encoded_image}"
                        }
                    },
                    {
                        "type": "text",
                        "text": "The image is the presentation slides. Please help to generate the speech script for the slide."
                    }
                ]
            }
        ],
        "temperature": 0.7,
        "top_p": 0.95,
        "max_tokens": 800
    }

    retries = 5
    for attempt in range(retries):
        try:
            response = requests.post(endpoint, headers=headers, json=payload)
            response.raise_for_status()
            return response.json()
        except requests.HTTPError as err:
            if response.status_code == 429:
                wait_time = 5  # Wait for 5 seconds before retrying
                print(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
                time.sleep(wait_time)
            else:
                raise SystemExit(f"Failed to make the request. Error: {err}")
        except requests.RequestException as e:
            raise SystemExit(f"Failed to make the request. Error: {e}")

# Main function to combine both processes
def main():
    # Parameters
    pdf_path = "aoaidemo.pdf"  # Path to the input PDF
    output_folder = "output_images"  # Folder to save output images
    api_key = "YOUR_API_KEY"  # Replace with your Azure OpenAI API key
    endpoint = "YOUR_ENDPOINT_URL"  # Replce with your Azure OpenAI endpoint

   # Convert PDF to images
    image_paths = pdf_to_images(pdf_path, output_folder)

    # Loop through each image and get descriptions
    descriptions = []
    for page_num, image_path in enumerate(image_paths, start=1):
        description = get_image_description(image_path, api_key, endpoint)
        descriptions.append((page_num, description))
        print(f"Description for page {page_num}: {description}")

    # Combine descriptions into a speech script
    speech_script = ""
    for page_num, desc in descriptions:
        try:
            text = desc['choices'][0]['message']['content']
            speech_script += f"Page {page_num}:\n{text}\n\n"
        except (KeyError, IndexError) as e:
            print(f"Error processing description for page {page_num}: {e}")

    # Output the final speech script to a text file
    with open("speech_script.txt", "w") as f:
        f.write(speech_script)

    print("\nGenerated Speech Script:\n")
    print(speech_script)

if __name__ == "__main__":
    main()

Saved page_1.png
Saved page_2.png
Saved page_3.png
Saved page_4.png
Saved page_5.png
Saved page_6.png
Description for page 1: {'choices': [{'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}, 'finish_reason': 'stop', 'index': 0, 'logprobs': None, 'message': {'content': 'Sure! Here’s a speech script for the slide:\n\n---\n\n"Good [morning/afternoon/evening], everyone.\n\nThank you for joining us today. I\'m excited to welcome you to our session on the latest updates for Azure OpenAI Service.\n\nAs you can see from the slide, we\'ll be covering what\'s new and improved in our service as of May 2024. \n\nWe have some exciting new features and enhancements to share with you, which will help you leverage the power of OpenAI\'s technologies more effectively and efficiently within the Azure ecosystem.\n\nSo let\'

In [7]:
# Function to read the speech script from a file
def read_speech_script(file_path):
    """Read the speech script from a file."""
    with open(file_path, "r") as file:
        return file.read()

# Function to summarize and smooth the script using Azure OpenAI
def summarize_script(script, api_key, endpoint):
    """Summarize and smooth the presentation script."""
    headers = {
        "Content-Type": "application/json",
        "api-key": api_key,
    }

    payload = {
        "messages": [
            {
                "role": "system",
                "content": [
                    {
                        "type": "text",
                        "text": "You are an AI assistant that helps people summarize and smoothen presentation scripts."
                    }
                ]
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": f"You are an AI assistant that helps people summarize and smoothen presentation scripts. Please rewrite the script to make it smooth and cohesive, ensuring good transitions between pages. Include a greeting at the beginning and a Q&A session at the end. Retain page numbers:\n\n {script}"
                    }
                ]
            }
        ],
        "temperature": 0.7,
        "top_p": 0.95,
        "max_tokens": 3000
    }

    retries = 5
    for attempt in range(retries):
        try:
            response = requests.post(endpoint, headers=headers, json=payload)
            response.raise_for_status()
            return response.json()
        except requests.HTTPError as err:
            if response.status_code == 429:
                wait_time = 5  # Wait for 5 seconds before retrying
                print(f"Rate limit exceeded. Retrying in {wait_time} seconds...")
                time.sleep(wait_time)
            else:
                raise SystemExit(f"Failed to make the request. Error: {err}")
        except requests.RequestException as e:
            raise SystemExit(f"Failed to make the request. Error: {e}")

# Function to write the summarized script to a file
def write_summarized_script(summary, output_path):
    """Write the summarized script to a file."""
    with open(output_path, "w") as file:
        file.write(summary)

def main():
    # Parameters
    input_file = "speech_script.txt"  # Input file containing the speech script
    output_file = "summarized_presentation.txt"  # Output file for the summarized presentation
    api_key = "YOUR_API_KEY"  # Replace with your Azure OpenAI API key
    endpoint = "YOUR_ENDPOINT_URL"  # Replace with your Azure OpenAI endpoint

    # Read the speech script from the file
    script = read_speech_script(input_file)

    # Get the summarized and smoothed version
    response = summarize_script(script, api_key, endpoint)
    try:
        summarized_script = response['choices'][0]['message']['content']
    except (KeyError, IndexError) as e:
        print(f"Error processing the response: {e}")
        return

    # Write the summarized script to a file
    write_summarized_script(summarized_script, output_file)

    print("\nSummarized Presentation Script:\n")
    print(summarized_script)

if __name__ == "__main__":
    main()


Summarized Presentation Script:

---

**Greeting:**

Good [morning/afternoon/evening], everyone!

Thank you for joining us today as we unveil the latest updates for the Azure OpenAI Service. We're excited to share the new features and enhancements that promise to significantly elevate your experience with Azure OpenAI.

---

**Page 1:**

As of May 2024, we've introduced several new features and improvements to Azure OpenAI. Our commitment to innovation is unwavering, and this update showcases our continuous efforts to integrate the latest advancements in AI. These features are designed to enhance the flexibility, scalability, and performance of the Azure OpenAI Service.

One of the key highlights is the improved integration capabilities, allowing seamless connectivity with other Azure services. This enables you to leverage a more cohesive ecosystem, simplifying the process of building, deploying, and managing AI solutions.

We've also focused on improving the user experience by introd