## 1.Connect to a Language Model

In [1]:
import os
import openai
import langchain
from langchain.chat_models import ChatOpenAI
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())
openai.api_key = os.environ['OPENAI_API_KEY']

llm = "gpt-3.5-turbo"
chat = ChatOpenAI(temperature=0.0, model=llm)

# Completion function
def get_completion(prompt, model=llm, client=openai):
    messages = [{"role": "user", "content": prompt}]
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0
    )
    return response.choices[0].message.content

## 2. Document Loaders

#### Initialize document loaders

In [2]:
# UnstructuredURLLoader
from langchain.document_loaders import UnstructuredURLLoader
def load_url_unstructured(url):
    unstructured_url_loader = UnstructuredURLLoader(urls=[url])
    return unstructured_url_loader.load()

# WebBaseLoader
from langchain.document_loaders import WebBaseLoader
def load_url_webbase(url):
    webbase_url_loader = WebBaseLoader(url)
    return webbase_url_loader.load()

# PDF Loader
from langchain.document_loaders import PyPDFLoader
def load_pdf(pdf_path):
    pdf_loader = PyPDFLoader(file_path=pdf_path)
    return pdf_loader.load()

# HTML Loader Function
from langchain.document_loaders import UnstructuredHTMLLoader
def load_html(file_path):
    html_loader = UnstructuredHTMLLoader(file_path=file_path)
    return html_loader.load()

# HTML with BeautifulSoup4 Loader Function
from langchain.document_loaders import BSHTMLLoader
def load_bshtml(file_path):
    bshtml_loader = BSHTMLLoader(file_path=file_path)
    return bshtml_loader.load()

# Text Loader Function
from langchain.document_loaders.text import TextLoader
def load_text(file_path):
    text_loader = UnstructuredURLLoader(file_path=file_path)
    return text_loader.load()

#### Our helper document in different formats

In [3]:
url = "https://scaleinsights.com/learn/how-to-analyse-amazon-ppc-data"
pdf = "amazon_ppc_guide.pdf"
html = "amazon_ppc_guide.html"

#### Loading the documents

In [4]:
# UnstructuredURLLoader
url_result1 = load_url_unstructured(url)
print("Loaded Content:\n", url_result1)

# WebBaseLoader
url_result2 = load_url_webbase(url)
print("Loaded Content:\n", url_result2)

# PDF Loader
pdf_result = load_pdf(pdf)
print("Loaded Content:\n", pdf_result)

# HTML Loader
html_result1 = load_html(html)
print("Loaded Content:\n", html_result1)

# BS HTML Loader
html_result2 = load_bshtml(html)
print("Loaded Content:\n", html_result2)

Loaded Content:
 [Document(page_content="Blog\n\nAbout\n\nFeatures\n\nRoadmap\n\nPricing\n\nContact\n\nSign Up For Free\n\nHow To Analyse Amazon PPC Data: 8 Must-Read Secrets\n\nStart 30 Days Free Trial\n\nNo Credit-Card Required\n\nDiscuss Amazon PPC strategies with other experts\n\nJoin our FB group\n\nFind article\n\nRecent Articles\n\nWhat Does Amazon's Choice Mean And How To Get A Badge?\n\nHow To Sell On Amazon FBA For Beginners: 12 Powerful Tactics\n\nAmazon Digital Display Advertising For Sellers\n\nTop tags\n\nAmazon PPC\n\nComparison Guides\n\nBest Amazon Tools\n\nDiscuss Amazon PPC strategies with other experts\n\nJoin our FB group\n\nScale Insights\n\nSmart PPC Solution for Smart Amazon Sellers\n\nStreamline your Amazon PPC workflows and scale profits with automation\n\nSign Up For Free\n\nHow To Analyse Amazon PPC Data: 8 Must-Read Secrets\n\nScale Insights Team\n\nShare:\n\n\n\n\n\n\n\namazon ppc data analysis\n\nanalysing amazon ppc data\n\nhow to analyse amazon ppc data

  rows = body.findall("tr") if body else []


Loaded Content:
 [Document(page_content='<!DOCTYPE html> <html  lang =" en "> <head>      <meta  charset =" utf-8 " />      <meta  name =" viewport "  content =" width=device-width, initial-scale=1.0 " />      <meta  name =" description "  content =" Learn to analyse your Amazon PPC data and make informed decisions to boost your PPC campaigns. This ultimate guide offers tips and insights for your campaigns. " />      <meta  name =" google-site-verification "  content =" CZknm9WnsZXpgecY8QAXSCAThTK7lvPzKp6ARxo6Ipg " />      <title> How To Analyse Amazon PPC Data: 8 Must-Read Secrets </title>      <link  rel =" canonical "  href =" https://scaleinsights.com/learn/how-to-analyse-amazon-ppc-data " />      <script  src =" https://kit.fontawesome.com/5e226041e5.js "  crossorigin =" anonymous "> </script>      <link  rel =" stylesheet "  href =" /css/learn.css?v=O1d47y045RZVCddNQqDXTBd6htSyCkb_AYGQFHKDYLA " />   \t      <!-- Google Tag Manager -->      <script>         (function (w, d, s, l, 

## 3. Split to have also a splitted version

In [5]:
# Step 1: Initializing Splitters
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter, TokenTextSplitter, HTMLHeaderTextSplitter
separators = ["\n\n", "\n", "\. ", " ", ""]
headers_to_split_on = headers_to_split_on = [("h1", "Header 1"), ("h2", "Header 2"), ("h3", "Header 3"), ("h4", "Header 4")]

c_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
r_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
t_splitter = TokenTextSplitter(chunk_size=10, chunk_overlap=0)
h_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on)

# Step 2: Split
html_header_splits = h_splitter.split_text_from_url(url) 
splits = r_splitter.split_documents(html_header_splits) # Be aware that whis "splits" documents are created from url, local variable that we created in the previous step is not used, for local file use html_splitter.split_text_from_file(<path_to_file>
splits[15:25]

[Document(page_content="Keyword Data: This data provides information about the performance of individual keywords in your PPC campaigns. It includes metrics such as clicks, impressions, conversion rates, and cost per click (CPC). Keyword data is crucial in identifying top-performing keywords that are driving conversions and optimising bids accordingly.  \nCampaign Data: Campaign data provides an overview of the performance of your PPC campaigns as a whole. It includes metrics such as total spending, total sales, and ACoS. Analysing campaign data helps you understand the overall impact of your campaigns on your business and identify areas for improvement.  \nSearch Term Data: This data provides insight into customers' search terms to find and click on your ads. It includes metrics such as clicks, impressions, and conversions by search term. Analysing search term data helps you identify new keywords to add to your campaigns and negative keywords to exclude, as well as improve your target

## 4. LLM 1 - Generate a structured format that contains information about the specifics of the problem statement
**Input:** 
- Problem statement (str)
- Background info (str)
- Desired output format

**Output:**
- Finetuned problem statement in a JSON Format

### Input Variables

In [6]:
problem_statement = """I want to analyze our Berne advertising performance in Germany in April 2023. I want to learn about the situation of important metrics and about immediate actions to take."""

In [7]:
background_information = """
Company Overview:
Brand name: Gozos
Industry: E-commerce
Product Focus: Home and living products
Marketplace: Amazon
Operational Regions: Germany (DE), France (FR), Italy (IT), Spain (ES)

Business Operations:
Product Range: A large catalog of home and living products.
Operations: The company is trying to grow fast, focusing on increasing sales volume and market share. They are trying to manage about 200 SKU's and 5 marketplaces with a small team. They need to have a very smart tool to analyse their advertising performance and come up with actionable insights.

Amazon Advertising Management:
The company invests significantly in Amazon advertising to boost product visibility and sales.
Advertising campaigns are diverse, targeting various product lines and marketplaces.
The company lacks a full-time employee with the expertise to export and analyze monthly Amazon advertising reports effectively.

General Problem Space:
The primary challenge is automating the analysis of Amazon advertising data.
This automation aims to help in understanding advertising performance, identifying key metrics, and suggesting actionable insights for improvement.
The scope involves creating a workflow that can process and analyze advertising data across different product lines and marketplaces efficiently.

Specific Use Case for LLM1:
LLM1 will receive problem statements related to Amazon advertising performance analysis.
It needs to refine these statements to provide clear, detailed, and context-specific prompts for further AI processing.
The focus is on turning general inquiries into well-structured, actionable queries that can guide subsequent analysis stages
"""

In [8]:
llm1_output_format = """
JSON format with the following keys:
"finetuned_problem_statement": "Your rephrased problem_statement that is about the same length as the original statement.",  
"domain": "One of the following: [Sales, Inventory, Advertising, Product]",
"collection_name": "if applicable, one from the following: [Berne tables, Scots Pine tables, Home office, Oviedo, Huesca, Baumkante coffee tables, Malaga, Baumkante consoles, Asymmetric mirrors, Terra mirrors, Mira mirrors, Mia, Roa, Mammo, Luna, Salamanca, Palencia, Bilbao, Murcia, Palamos]", 
"location": "if applicable, one of the following: [DE, FR, IT, ES]", 
"timeframe": "If applicable give details about timeframe mentioned",
"""

### LLM1 Prompt

In [9]:
llm1_prompt = f"""
Your task is to refine a business owner's problem statement, provided below enclosed within triple backticks.\
Use the problem statement and Business Background Information, also enclosed within triple backticks, to understand the context.\
Your goal is to enhance the grammar, sentence structure, and clarity for better suitability for an AI model. Include specific details like KPIs or relevant metrics if applicable.\        

Original Problem Statement: 
```{problem_statement}```

Business Background Information (for context when rephrasing problem_statement, not direct use):
```{background_information}```

Please follow this format for your output, focusing on clarity and detail. Use only problem_statement when filling domain, collection_name, location, and timeframe:
```{llm1_output_format}```

Your reformulated statement will be integrated with additional documents to create a detailed prompt for another AI model. Ensure that your output is precise and thorough.
"""

### Output

In [10]:
problem_statement_structured = get_completion(llm1_prompt)

In [11]:
print(problem_statement_structured)

{
"finetuned_problem_statement": "I need to analyze the advertising performance of our Berne product line in Germany during April 2023. I want to gain insights into important metrics and determine immediate actions to take for improvement.",
"domain": "Advertising",
"collection_name": "Berne tables",
"location": "DE",
"timeframe": "April 2023"
}


In [11]:
import json 
def read_string_to_list(input_string):
    if input_string is None:
        return None
    try:
        input_string = input_string.replace("'", "\"")  # Replace single quotes with double quotes for valid JSON
        data = json.loads(input_string)
        return data
    except json.JSONDecodeError:
        print("Error: Invalid JSON string")
        return None 
    
problem_statement_list = read_string_to_list(problem_statement_structured)
print(problem_statement_list)

{'finetuned_problem_statement': 'I need to analyze the advertising performance of our brand, Gozos, in the German market during April 2023. I want to gain insights into the key metrics and determine immediate actions to improve our advertising strategy.', 'domain': 'Advertising', 'collection_name': 'Gozos', 'location': 'DE', 'timeframe': 'April 2023'}


## 4. LLM 2
**Input:** 
- Informative documents on Amazon advertising, analysis template.
- Desired output format

**Task:**
- Develop a clear, step-by-step analysis method resembling a pseudo-code algorithm.

**Output:**
- A detailed instruction set for Amazon advertising report analysis.

In [12]:
amazon_ads_analysis_flow = """
BEGIN Analysis Algorithm

  // User specifies a collection name
  SET CollectionName = input("Enter collection name")

  // Initialize variables for collection-specific analysis
  SET CollectionTotalSales = 0
  SET CollectionAdSales = 0
  SET CollectionSpend = 0
  SET CollectionClicks = 0
  SET CollectionOrders = 0

  FOR EACH record in Amazon Ads Report
    IF record.collection_id MATCHES CollectionName
      // Sum up sales, spend, clicks, and orders for the specified collection
      ADD record.sales to CollectionTotalSales
      ADD record.spend to CollectionSpend
      ADD record.clicks to CollectionClicks
      ADD record.orders to CollectionOrders

      IF record is an ad campaign
        ADD record.sales to CollectionAdSales
      END IF
    END IF
  END FOR

  // Calculate organic sales percentage, ACoS, Total ACoS, ROAS, Conversion Rate for the collection
  SET OrganicSalesPercentage = ((CollectionTotalSales - CollectionAdSales) / CollectionTotalSales) * 100
  SET ACoS = (CollectionAdSales / CollectionSpend) * 100
  SET TotalACoS = (CollectionTotalSales / CollectionSpend) * 100
  SET ROAS = CollectionAdSales / CollectionSpend
  SET ConversionRate = (CollectionOrders / CollectionClicks) * 100
  SET AverageCPC = CollectionSpend / CollectionClicks

  // Display Results for the specified collection
  PRINT "Results for Collection: ", CollectionName
  PRINT "Total Sales: ", CollectionTotalSales
  PRINT "Total Ad Sales: ", CollectionAdSales
  PRINT "Organic Sales Percentage: ", OrganicSalesPercentage
  PRINT "ACoS: ", ACoS
  PRINT "Total ACoS: ", TotalACoS
  PRINT "ROAS: ", ROAS
  PRINT "Conversion Rate: ", ConversionRate
  PRINT "Average CPC: ", AverageCPC

END Analysis Algorithm
"""

In [15]:
ads_report_path = "../data/advertising.csv"
import pandas as pd
ads_report = pd.read_csv(ads_report_path)

In [17]:
analyze_prompt = f"""
Please use these instructions given in delimited with triple quotes '''{amazon_ads_analysis_flow} to analyze the spreadsheet given below delimitted with triple brackets.\
advertising data: <<<{ads_report}>>>"""

In [18]:
get_completion(analyze_prompt)

'To analyze the given spreadsheet, we will use the provided analysis algorithm. Here is the modified algorithm:\n\n\'\'\'\nBEGIN Analysis Algorithm\n\n  // User specifies a collection name\n  SET CollectionName = input("Enter collection name")\n\n  // Initialize variables for collection-specific analysis\n  SET CollectionTotalSales = 0\n  SET CollectionAdSales = 0\n  SET CollectionSpend = 0\n  SET CollectionClicks = 0\n  SET CollectionOrders = 0\n\n  FOR EACH record in Amazon Ads Report\n    IF record.collection_id MATCHES CollectionName\n      // Sum up sales, spend, clicks, and orders for the specified collection\n      ADD record.sales to CollectionTotalSales\n      ADD record.spend to CollectionSpend\n      ADD record.clicks to CollectionClicks\n      ADD record.orders to CollectionOrders\n\n      IF record is an ad campaign\n        ADD record.sales to CollectionAdSales\n      END IF\n    END IF\n  END FOR\n\n  // Calculate organic sales percentage, ACoS, Total ACoS, ROAS, Convers

In [19]:
import pandas as pd
from langchain.llms import OpenAI
from langchain.agents.agent_types import AgentType
from langchain.document_loaders import DataFrameLoader
from langchain_experimental.agents.agent_toolkits import create_csv_agent
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent

In [21]:
agent = create_pandas_dataframe_agent(
    OpenAI(temperature=0),
    ads_report,
    verbose=True,
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
)

In [26]:
prompt_z = """
You are provided with a spreadsheet containing Amazon advertising data. Your task is to analyze the data and provide actionable insights. Below there are some mainstream analysis that you can perform. Please use the instructions given below to complete the task.
Sales Analysis:

Total Sales: Sum the 'sales' column for all collections.
Sales from Ads: Already given in the 'sales' column for each campaign.
Budget Analysis:

Spend per Collection: Sum the 'spend' column for each collection.
Organic Sales Percentage:

Calculate as: (Total Sales - Sales from Ads) / Total Sales * 100.
ACoS and Total ACoS Analysis:

ACoS: Already provided in the 'ACOS' column.
Total ACoS: Calculate using Total Sales (from the order report) and total spend for each collection.
ROAS Analysis:

ROAS: Already provided in the 'ROAS' column.
Conversion Rate Analysis:

Conversion Rate: Calculate for each collection using (Number of Orders / Number of Clicks) * 100.
CPC (Cost Per Click) Analysis:

CPC: Already given in the 'cpc' column

Your constraints are:
- You will only analyze the data for the collection 'Berne tables'.
- You will format your output as follows:
1- Title for each analysis
2- Numbers for each analysis
3- Interpretation for each analysis
"""

In [27]:
result = agent.run(prompt_z)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Thought: I need to analyze the data for the collection 'Berne tables'.
Action: python_repl_ast
Action Input: df[df['campaign_name'] == 'SP - Berne + Scots sehpalar - Manual'].sum()[0m
Observation: [36;1m[1;3mState                                         ENABLED
campaign_name    SP - Berne + Scots sehpalar - Manual
collection_id                                       7
category_id                                         3
satatus                       CAMPAIGN_STATUS_ENABLED
targeting                                      MANUAL
budget                                             20
cost_type                                         CPC
impressions                                    619108
clicks                                           1720
CTR                                              28.0
spend                                           585.0
cpc                                              0.34
orders                   