In [3]:
import openai
import os
from dotenv import load_dotenv

from langchain.document_loaders import PyMuPDFLoader

#### OPEN AI Settings (GPT 4)

In [22]:
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

In [20]:
def get_completion(
        prompt: str,
        system_prompt='',
        prev_messages=[]
    ):
    messages = [{"role": "system", "content": system_prompt}]
    messages.extend(prev_messages)
    messages.append({"role": "user", "content": prompt.strip()})
    # For CLI info
    print("--Sending request to OpenAI endpoint--")

    try:
        response = openai.ChatCompletion.create(
            model='gpt-4',
            messages=messages,
            temperature=0,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=2,
            max_tokens = 400
        )
    except (openai.error.APIError, openai.error.RateLimitError, openai.error.APIConnectionError) as e:
        # Handle API errors and retry the request
        print("API Error:", e)
        print("Retrying the request...")
        return get_completion(prompt, system_prompt, prev_messages)
    
    return response.choices[0].message["content"]

#### Loading PDF

In [5]:
loader = PyMuPDFLoader('doc.pdf').load()
all_text = []
for i in range(len(loader)):
    text = loader[i].page_content
    all_text.append(text)

all_text = ','.join(all_text)

## Method 1: Passing all Context in Prompt

In [6]:
categories = ['Social Advertising', 'Search Advertising', 'Digital Display Advertising', 'Digital Video Advertising', 
 'Partner/Affiliate/Co-OP on Digital Channels', 'Digital Audio Advertising', 'Digital Out of Home', 'SEO',
 'Email Marketing', 'Content and Messaging', 'Organic Social/Influencer', 'SMS/In App Messaging']

In [7]:
query = """
    Read the information and trends from the context and assign a marketing budget to the given categories.
    OUTPUT FORMAT: ONLY JSON with categories as keys and assigned budget in percentages as values.
    Also mention the reasons for assigning percentages to methods.
"""

In [8]:
prompt = f"""
Context: "{all_text}"
Categories: "{categories}"
{query}
"""

In [None]:
# this results in error as the text crosses the 8k context size of GPT-4

result = get_completion(prompt)

## Method 2: Without Context/Document

In [16]:
categories = ['Social Advertising', 'Search Advertising', 'Digital Display Advertising', 'Digital Video Advertising', 
 'Partner/Affiliate/Co-OP on Digital Channels', 'Digital Audio Advertising', 'Digital Out of Home', 'SEO',
 'Email Marketing', 'Content and Messaging', 'Organic Social/Influencer', 'SMS/In App Messaging']

In [17]:
query = """
    Act as a Marketing Manager and devise a marketing campaign strategy. Assign a marketing budget to the given categories.
    OUTPUT FORMAT: ONLY JSON with categories as keys and assigned budget in percentages as values.
    Also mention the reasons for assigning percentages to methods.
"""

In [18]:
prompt = f"""
Categories: "{categories}"
{query}
"""

In [19]:
result = get_completion(prompt)

--Sending request to OpenAI endpoint--


In [20]:
print(result)

{
    "Social Advertising": 20,
    "Search Advertising": 15,
    "Digital Display Advertising": 10,
    "Digital Video Advertising": 15,
    "Partner/Affiliate/Co-OP on Digital Channels": 5,
    "Digital Audio Advertising": 5,
    "Digital Out of Home": 5,
    "SEO": 10,
    "Email Marketing": 5,
    "Content and Messaging": 5,
    "Organic Social/Influencer": 3,
    "SMS/In App Messaging": 2
}

Reasons for assigning percentages:

1. "Social Advertising" - This is given the highest percentage because social media platforms have a large user base, making it an effective way to reach a wide audience.

2. "Search Advertising" & "Digital Video Advertising" - These are assigned significant portions as they are highly effective in driving traffic and conversions. Search advertising helps in reaching people who are actively looking for products or services similar to yours, while video advertising can be more engaging and informative.

3. "Digital Display Advertising" & "SEO" - They are also

## Method 3: Getting context from Similarity Search

In [21]:
from langchain.document_loaders import PyMuPDFLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

# Load the document, split it into chunks, embed each chunk and load it into the vector store.
raw_documents = PyMuPDFLoader('doc.pdf').load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
documents = text_splitter.split_documents(raw_documents)
db = Chroma.from_documents(documents, OpenAIEmbeddings())

In [27]:
categories = ['Social Advertising', 'Search Advertising', 'Digital Display Advertising', 'Digital Video Advertising', 
 'Partner/Affiliate/Co-OP on Digital Channels', 'Digital Audio Advertising', 'Digital Out of Home', 'SEO',
 'Email Marketing', 'Content and Messaging', 'Organic Social/Influencer', 'SMS/In App Messaging']

In [23]:
context = []

for i in categories:
    query = f"How much should be spent on {i} for marketing campaign?"
    docs = db.similarity_search(query)
    context.append(docs[0].page_content)
    context.append(docs[1].page_content)
    # print(f"\n* {i} *")
    # print(docs[0].page_content)
    # print("\n------------------------\n")


In [24]:
print(len(context))

24


In [25]:
text_context = ','.join(context)

In [27]:
query = """
    Read the information and trends from the context and assign a marketing budget to the given categories.
    OUTPUT FORMAT: ONLY JSON with categories as keys and assigned budget in percentages as values.
    Also mention the reasons for assigning percentages to methods.
"""

In [28]:
prompt = f"""
Context: "{text_context}"
Categories: "{categories}"
{query}
"""

In [29]:
result = get_completion(prompt)

--Sending request to OpenAI endpoint--


In [151]:
print(result)

{
  "Social Advertising": 15,
  "Search Advertising": 10,
  "Digital Display Advertising": 8,
  "Digital Video Advertising": 12,
  "Partner/Affiliate/Co-OP on Digital Channels": 5,
  "Digital Audio Advertising": 3,
  "Digital Out of Home": 2,
  "SEO": 20,
  "Email Marketing": 10,
  "Content and Messaging": 7,
  "Organic Social/Influencer": 8,
  "SMS/In App Messaging": 0
}

Reasons for assigning percentages:
1. Social Advertising: Given the popularity and effectiveness of paid social media content, allocating a higher budget percentage can help reach a wider audience and generate strong returns.
2. Search Advertising: With 29% of marketers using search-optimized websites and blogs to attract leads, investing in search advertising can complement SEO efforts and drive targeted traffic.
3. Digital Display Advertising: Allocating a moderate budget percentage to digital display advertising allows for brand visibility and reaching audiences across various online platforms.
4. Digital Video Ad

## Method 4: Passing the Summarized Report as Context

In [5]:
loader = PyMuPDFLoader('doc.pdf').load()
all_text = []
for i in range(len(loader)):
    text = loader[i].page_content
    all_text.append(text)

all_text = ','.join(all_text)

In [32]:
# convert all the text in reports to chunks to fit in the 8k context window of GPT-4

def chunk_string(input_string, chunk_size):
    chunks = []
    for i in range(0, len(input_string), chunk_size):
        chunk = input_string[i:i + chunk_size]
        chunks.append(chunk)
    return chunks

In [24]:
chunks = chunk_string(all_text, 25000)
print(len(chunks))

3


In [40]:
# genrating summaries of chunks with the help of GPT-4
summaries = []
for chunk in chunks:
    summary_prompt = f"""
        Summarize the given text for assigning marketing budgets to these categories: {categories}
        text: {chunk}
        """
    summaries.append(get_completion(summary_prompt))
    

--Sending request to OpenAI endpoint--
--Sending request to OpenAI endpoint--
API Error: Rate limit reached for 10KTPM-200RPM in organization org-mVT5G0ykVqlNzVawBd0W4h14 on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues.
Retrying the request...
--Sending request to OpenAI endpoint--
API Error: Rate limit reached for 10KTPM-200RPM in organization org-mVT5G0ykVqlNzVawBd0W4h14 on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues.
Retrying the request...
--Sending request to OpenAI endpoint--
API Error: Rate limit reached for 10KTPM-200RPM in organization org-mVT5G0ykVqlNzVawBd0W4h14 on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues.
Retrying the request...
--Sending request to OpenAI endpoint--
API 

In [41]:
summaries

["The 2023 marketing trends report highlights the increasing importance of AI and automation in marketing, with tools such as HubSpot's new AI Tools being introduced. The tension between short-term growth and long-term brand investment is also noted, with a balanced approach recommended. Other key trends include the use of AI for content creation and video production, data privacy concerns, and the continued success of video content for engagement. Marketers are experimenting with new media formats and optimizing existing assets to meet their goals. In terms of budget allocation, marketers expect to continue investing in channels that generate strong returns even during economic downturns. These include content marketing (blogs and SEO), virtual events, SMS marketing, email marketing, and organic social media.",
 'The text discusses the current state of marketing trends and how to allocate budgets across various categories. It suggests that despite a decline in media costs, ad spend ha

In [42]:
total_summary = ','.join(summaries)
print(total_summary)
print(len(total_summary))

The 2023 marketing trends report highlights the increasing importance of AI and automation in marketing, with tools such as HubSpot's new AI Tools being introduced. The tension between short-term growth and long-term brand investment is also noted, with a balanced approach recommended. Other key trends include the use of AI for content creation and video production, data privacy concerns, and the continued success of video content for engagement. Marketers are experimenting with new media formats and optimizing existing assets to meet their goals. In terms of budget allocation, marketers expect to continue investing in channels that generate strong returns even during economic downturns. These include content marketing (blogs and SEO), virtual events, SMS marketing, email marketing, and organic social media.,The text discusses the current state of marketing trends and how to allocate budgets across various categories. It suggests that despite a decline in media costs, ad spend has fall

In [45]:
categories = ['Social Advertising', 'Search Advertising', 'Digital Display Advertising', 'Digital Video Advertising', 
 'Partner/Affiliate/Co-OP on Digital Channels', 'Digital Audio Advertising', 'Digital Out of Home', 'SEO',
 'Email Marketing', 'Content and Messaging', 'Organic Social/Influencer', 'SMS/In App Messaging']

In [44]:
query = """
    Read the information and trends from the context and assign a marketing budget to the given categories.
    OUTPUT FORMAT: ONLY JSON with categories as keys and assigned budget in percentages as values.
    Also mention the reasons for assigning percentages to methods.
"""

In [46]:
prompt = f"""
Context: "{total_summary}"
Categories: "{categories}"
{query}
"""

In [47]:
result = get_completion(prompt)

--Sending request to OpenAI endpoint--


In [48]:
print(result)

{
    "Social Advertising": 10,
    "Search Advertising": 15,
    "Digital Display Advertising": 5,
    "Digital Video Advertising": 20,
    "Partner/Affiliate/Co-OP on Digital Channels": 5,
    "Digital Audio Advertising": 3,
    "Digital Out of Home": 2,
    "SEO": 15,
    "Email Marketing": 10,
    "Content and Messaging": 8,
    "Organic Social/Influencer": 5,
    "SMS/In App Messaging": 2
}

Reasons:

"Social Advertising": Despite budget cuts in paid social media content, it is still important to maintain a presence. Brands should focus on organic growth but also allocate some budget for paid promotions.

"Search Advertising": SEO tactics are recommended for brands cutting marketing investment. This includes both optimizing existing assets and creating new ones.

"Digital Display Advertising": With the decline in ad spend across all channels, this category receives a lower percentage.

"Digital Video Advertising": The continued success of video content for engagement and the rise 