In [35]:
import os
import requests
import json
import pptx  # Library to process PowerPoint files
from docx import Document  # Library to create Word documents
from docx.shared import Pt  # For font size
from docx.oxml.ns import qn  # For font name
from docx.shared import RGBColor  # For font color
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT  # For center alignment


# Define your GPT variables
deployment = 'gpt-4'
apiversion = '2024-06-01'
api_key = os.environ.get("KO_GPT_API")
folder_path = r'C:\Users\O52834\OneDrive - The Coca-Cola Company\Documents\BEACH\W+ Modeling\Iteration Outputs'

# Function to split text into smaller chunks
def chunk_text(text, max_tokens=3000):
    words = text.split()
    chunks = []
    current_chunk = []
    current_length = 0

    for word in words:
        current_chunk.append(word)
        current_length += len(word) + 1  # +1 for space
        if current_length >= max_tokens:
            chunks.append(" ".join(current_chunk))
            current_chunk = []
            current_length = 0

    if current_chunk:  # Append the remaining chunk
        chunks.append(" ".join(current_chunk))

    return chunks

# Function to summarize text chunks
def summarize_presentation(content, chunk_size=3000):
    # Split content into manageable chunks
    text_chunks = chunk_text(content, chunk_size)

    summaries = []

    for chunk in text_chunks:
        prompt = f"""
        You have been provided a set of PowerPoint presentations across major brands and markets for The Coca-Cola Company. 
        Write a concise summary of the key insights from the following content:
        {chunk}
        """
        
        # Prepare the request payload
        data = {
            "temperature": 1,
            "top_p": 1,
            "max_tokens": 1500,
            "messages": [
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ]
        }
        
        url = f"https://apim-emt-aip-prod-01.azure-api.net/openai/deployments/{deployment}/chat/completions?api-version={apiversion}"
        headers = {
            'Content-Type': 'application/json',
            'Ocp-Apim-Subscription-Key': api_key
        }
        
        try:
            response = requests.post(url, headers=headers, json=data)
            response.raise_for_status()

            response_json = response.json()
            summary = response_json['choices'][0]['message']['content']
            summaries.append(summary)

        except requests.RequestException as e:
            print(f"An error occurred: {e}")
            return None

    # Combine the individual summaries
    combined_summary = " ".join(summaries)
    return combined_summary

# Main function to process PowerPoints and summarize them
def process_powerpoints(folder_path):
    all_text = ""

    # Extract text from all PowerPoints
    for filename in os.listdir(folder_path):
        if filename.endswith(".pptx"):
            file_path = os.path.join(folder_path, filename)
            print(f"Processing: {filename}")
            
            ppt_text = extract_text_from_ppt(file_path)
            all_text += ppt_text

    # Summarize the combined content
    combined_summary = summarize_presentation(all_text)
    
    if combined_summary:
        # Save the final summary to a Word document
        output_file = os.path.join(folder_path, "Weekly+_Drivers_Final_Summary.docx")
        save_combined_summary_to_word({"Final Summary": combined_summary}, output_file)
        print(f"Final summary saved to {output_file}\n")
    else:
        print("Failed to generate final summary.")

# Call the script
if __name__ == "__main__":
    process_powerpoints(folder_path)


Processing: BRA_br_0198.pptx
Processing: BRA_br_0201.pptx
Processing: BRA_br_0219.pptx
Processing: BRA_br_0309.pptx
Processing: BRA_br_ip_312.pptx
Processing: CHN_br_0201.pptx
Processing: CHN_br_0436.pptx
Processing: CHN_br_0875.pptx
Processing: CHN_br_1836.pptx
Processing: IND_br_0201.pptx
Processing: IND_br_0494.pptx
Processing: IND_br_0567.pptx
Processing: IND_br_0875.pptx
Processing: IND_br_0921.pptx
Processing: JPN_br_0058.pptx
Processing: JPN_br_0201.pptx
Processing: JPN_br_0369.pptx
Processing: JPN_br_ip_219.pptx
Processing: JPN_br_ip_398.pptx
Processing: MEX_br_0179.pptx
Processing: MEX_br_0201.pptx
Processing: MEX_br_ip_294.pptx
Processing: MEX_br_ip_312.pptx
Processing: MEX_br_ip_617.pptx
Processing: USA_br_0198.pptx
Processing: USA_br_0201.pptx
Processing: USA_br_0248.pptx
Processing: USA_br_0309.pptx
Processing: USA_br_0875.pptx
Final summary saved to C:\Users\O52834\OneDrive - The Coca-Cola Company\Documents\BEACH\W+ Modeling\Iteration Outputs\Weekly+_Drivers_Final_Summary