# Chapter 1 - generating the prompt engineering report

In [1]:
from docx import Document
from docx.shared import RGBColor
from langchain import OpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import PromptTemplate
import openai
import json
import re

In [2]:
openai_api_key = "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"

In [3]:
file_path = 'H:\Documents\Software Development\QUB Software Development\Data_analysis_module\Chapter1prompts.docx'

# Define the colors as per the user's definitions
GREEN1 = (56,118,29)
GREEN2 = (39,78,19)
PURPLE = (103,78,167)

## Functions to read in prompts file and split it up based on 'New chat'

In [4]:
def read_docx(file_path):
    """
    Open and read a .docx file, extract text along with color information.
    """
    doc = Document(file_path)
    text = []
    for para in doc.paragraphs:
        for run in para.runs:
            if run.font.color.rgb is not None:
                color = run.font.color.rgb
            else:
                color = None
            text.append((run.text, color))
    return text

In [5]:
def split_doc_into_chats_single_string(file_path, max_words=2000):
    """
    Split a .docx document into separate chats based on color information and a "<New chat>" marker.
    If a chat contains more than max_words words, split it into multiple sections.
    Each prompt and response is appended to a single string.
    Return a list of chats, where each chat is a list of text strings.
    """
    # Extract the text along with color information
    text_color_tuples = read_docx(file_path)

    # Group the text by conversations
    chats = []
    current_chat = {"prompt": "", "response": ""}
    current_speaker = None
    for text, color in text_color_tuples:
        # Check for the start of a new chat
        if "<New chat>" in text:
            # If a chat is in progress, add it to the list of chats
            if current_chat["prompt"] or current_chat["response"]:
                chats.append(current_chat)
                current_chat = {"prompt": "", "response": ""}
                current_speaker = None
        # Check for AI prompts and responses by color
        if color == GREEN1 or color == GREEN2:
            current_speaker = "prompt"
        elif color == PURPLE:
            current_speaker = "response"
        # Append text to the current speaker
        if current_speaker:
            current_chat[current_speaker] += " " + text
    # Add the last chat
    if current_chat["prompt"] or current_chat["response"]:
        chats.append(current_chat)

    # Break up any chat that has over max_words words into smaller sections
    split_chats = []
    for chat in chats:
        split_chat = {}
        for speaker in ["prompt", "response"]:
            words = chat[speaker].split()
            if len(words) > max_words:
                # Split the chat into sections of max_words each
                sections = [words[i:i+max_words] for i in range(0, len(words), max_words)]
                # Convert the sections back into text strings
                sections = [' '.join(section) for section in sections]
                split_chat[speaker] = sections
            else:
                split_chat[speaker] = chat[speaker]
        split_chats.append(split_chat)
    
    return split_chats

In [6]:
# Test the function on the same document
chats_single_string = split_doc_into_chats_single_string(file_path)
chats_single_string[:5]  # Display the first 5 chats as an example

[{'prompt': ' <New chat> You are playing the role of an industry expert.\xa0 You are a luthier and own a company that refurbishes vintage tenor banjos to sell on for a profit.\xa0 Your target market are players of Irish traditional music.\xa0 Your goal is to tell me in detail about a recent project and provide me with insights that would not be apparent to an outsider.\xa0 Later, this information will be used to propose novel ways that the company could increase revenue after analysis of dat a obtained a bout the wider industry.\xa0 Details that impact the success or failure of a project are useful to me. \xa0By studying the low level details of how work is performed, we may identify underlying problems or unexploited opportunities.',
  'response': " <ChatGPT3.5-chat.openai.com> As a luthier specializing in refurbishing vintage tenor banjos for players of Irish traditional music, I recently undertook an interesting project that involved a rare 1920s Gibson TB-3 tenor banjo. This partic

## Using ChatGPT to assess each prompt and response to write pros and cons

In [7]:
industry = "vintage banjo refurbishment"
instruction = f"""Review the prompt and response.
                The 'prompt' is from a human interating with an AI large language model.
                The 'response' has been generated by the AI in response to the prompt.
                The human is using the AI to research the {industry} industry
                for a data analysis project.
                This section of the project is focused on understanding the business,
                how projects progress, successes and challenges to businesses,
                and values of businesses in the industry.
                Give a list of pros and cons based on how effective the prompt is 
                at obtaining useful information from the AI large language model
                based on the aim of this section."""

In [8]:
def query_completion(prompt, response, instruction):
    openai.api_key = openai_api_key
    messages=[
        {"role": "system", "content": "You are an objective, analytical, and fair-minded assistant. You are able to approach discussions or arguments with an open mind, striving to understand the various perspectives involved. "},
        {"role": "user", "content": f"Prompt: {prompt}"},
        {"role": "user", "content": f"AI Response: {response}"},
        {"role": "user", "content": instruction},
    ]

    query_response = openai.ChatCompletion.create(
      model="gpt-3.5-turbo",
      messages=messages
    )
    
    return query_response.choices[0].message['content']


In [9]:
doc = Document()
green = RGBColor(39,78,19)

In [38]:
# Loop through the chats_single_string dictionary
for single_chat in chats_single_string:
    # Get the prompt and response
    prompts = single_chat["prompt"]
    responses = single_chat["response"]

    # Ensure prompts and responses are lists for consistency
    if isinstance(prompts, str):
        prompts = [prompts]
    if isinstance(responses, str):
        responses = [responses]

    # Iterate over each prompt-response pair
    for prompt, response in zip(prompts, responses):
        # Call the query_completion() function
        pros_cons_list = query_completion(prompt, response, instruction)

        # Print the pros and cons list
        print(pros_cons_list)
        
        # Add the prompt to the document in green
        para = doc.add_paragraph()
        run = para.add_run(f"Prompt: {prompt}")
        run.font.color.rgb = green

        # Add the pros and cons list to the document
        doc.add_paragraph(f"Pros and Cons: {pros_cons_list}")

# Save the document
doc.save("H:\Documents\Software Development\QUB Software Development\Data_analysis_module\Chapter1PromptEngReport.docx")

Pros:
1. The prompt clearly defines the role and objective of the AI conversation, which is to obtain insights and details about a recent project in the vintage banjo refurbishment industry.
2. The prompt provides specific details about the target market (players of Irish traditional music) and the goal of the company (to refurbish banjos for profit).
3. The prompt requests insights that may not be apparent to an outsider, indicating a desire for in-depth information and analysis.
4. The prompt mentions the importance of studying low-level details, which encourages the AI to provide specific and granular information.
5. The prompt suggests analyzing data obtained about the wider industry, indicating a data-driven approach to identifying problems and opportunities.

Cons:
1. The prompt does not explicitly mention the specific type of insights or information the human is seeking about the recent project. It would be beneficial to provide specific questions or areas of interest to guide t

## Finally, write the original prompts and responses pasted from AI to the bottom of the document

*I had difficulty writing the list of prompts and responses to the bottom of the prompt engineering report, but eventually got it working with option 3 below which keeps the original colours and formatting intact. It was difficult to read when all the text was in black, or grouped together in one long paragraph.*

In [42]:
PURPLE = RGBColor(103,78,167)
GREEN = RGBColor(39,78,19)

In [46]:
# Option 1 - adds prompts and responses in black
# Add prompts and responses after the pros and cons
# Load the existing document
source_doc = Document('H:\Documents\Software Development\QUB Software Development\Data_analysis_module\Chapter1prompts.docx')

# Add the contents of the existing document to the new document
for paragraph in source_doc.paragraphs:
    # Copy the text of each paragraph
    text = paragraph.text
    # Add the text to the new document
    doc.add_paragraph(text)

# Save the new document
doc.save("H:\Documents\Software Development\QUB Software Development\Data_analysis_module\Chapter1PromptEngReport.docx")

In [45]:
# Option 2 - maintains the colours but all the text response is without correct spacing
# Loop through the chats_single_string dictionary
for single_chat in chats_single_string:
    # Get the prompt and response
    prompt = single_chat["prompt"]
    response = single_chat["response"]

    # Add the prompt to the document in green
    para = doc.add_paragraph()
    run = para.add_run(f"Prompt: {prompt}")
    run.font.color.rgb = GREEN  # or GREEN2 depending on your requirement

    # Add the response to the document in purple
    para = doc.add_paragraph()
    run = para.add_run(f"AI Response: {response}")
    run.font.color.rgb = PURPLE

# Save the document
doc.save("H:\Documents\Software Development\QUB Software Development\Data_analysis_module\Chapter1PromptEngReport.docx")

In [8]:
# Option 3 WORKING - This version maintains both the colours and spacing of the original prompt and response document
from docx.shared import RGBColor
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT

source_doc = Document('H:\Documents\Software Development\QUB Software Development\Data_analysis_module\Chapter1prompts.docx')

for paragraph in source_doc.paragraphs:
    # Create a new paragraph in the target document
    new_paragraph = doc.add_paragraph()
    
    # Copy the alignment (and other properties if needed)
    new_paragraph.alignment = paragraph.alignment

    for run in paragraph.runs:
        # Create a new run in the new paragraph
        new_run = new_paragraph.add_run(run.text)

        # Copy the font style, including color
        new_run.font.name = run.font.name
        new_run.font.size = run.font.size
        new_run.font.bold = run.font.bold
        new_run.font.italic = run.font.italic
        new_run.font.underline = run.font.underline
        new_run.font.color.rgb = run.font.color.rgb

# Save the new document
doc.save("H:\Documents\Software Development\QUB Software Development\Data_analysis_module\Chapter1PromptEngReport.docx")