Starting point and first notebook

In [1]:
from google import genai
from google.genai import types
from IPython.display import Markdown
from pydantic import BaseModel, Field
from devtools import debug
from pathlib import Path
from typing import *
import requests
import re
import json

### Config

#### Product to Research
Most important config setting! Change to the product category you wish to study.
Also specify the number of metrics to consider.

All subsequent notebooks will use PRODUCT to determine which analysis session folder
to use!

In [None]:
PRODUCT = "wireless over-ear headphones"
NUM_METRICS = 5
NUM_COMPETITORS = 5

# Of lesser importance; used for 3_extract_specs.ipynb due to poor accuracy of direct extraction.
MAX_SPECS = 10

DATA_DIR = Path("session") / PRODUCT
DATA_DIR.mkdir(parents=True, exist_ok=True)

#### API Keys and Model
See <https://ai.google.dev/gemini-api/docs/api-key>

In [None]:
# Should be from Google AI Studio.
GOOGLE_AI_KEY = "AIzaSyDAlPx7St5BUXqlwiqFKvlT-Sc2dnTT4Jc"
# 2.0 Flash since free 1500 RPD and Gemma's structured output is disabled.
GOOGLE_AI_MODEL = "gemini-2.0-flash"

### Operations

#### Setup

In [4]:
client = genai.Client(api_key=GOOGLE_AI_KEY)

#### Determining Key Metrics
As part of the automated process, we prompt a model to brainstorm key metrics for the product category. This serves as a starting point by which to analyze competitor products.

In [5]:
# BTW, putting rationale before metrics is a way to force the model to think first.
class MetricsResult(BaseModel):
    category: Literal[PRODUCT] = Field(description="Category that was analyzed.")
    rationale: str = Field(
        description="Detailed rationale for picking the below metrics."
    )
    metrics: List[str] = Field(
        min_length=NUM_METRICS,
        max_length=NUM_METRICS,
        description="Concise metrics to focus on.",
    )

In [6]:
prompt_metrics = f"""\
### Job Description
You are a market research analyst. You will be given a product category, and from \
that should determine the most relevant design metrics for it. These design \
metrics will subsequently be used by the engineering team to evaluate existing products \
and generate key design requirements for our new product to beat the competition.

For example, given the product category "laptop", the top 5 design metrics \
could be:
1. battery life
2. performance
3. portability
4. durability
5. keyboard quality

Another example, given the product category "accounting app", the top 3 design metrics \
could be:
1. ease of use
2. data security
3. integration with other tools

Note that we are focused solely on design metrics, hence other metrics like "customer support" \
and "price" are irrelevant as they cannot be met through design engineering efforts.

Also, try to avoid overlapping metrics. For example, "battery life" and "screen time" \
which are both related to battery performance should not be included together.

### Response Schema
{MetricsResult.model_json_schema()}

### Task
Use the JSON schema specified above to provide the top **{NUM_METRICS}** design metrics \
for product category "{PRODUCT}".\
"""

display(Markdown(prompt_metrics))

### Job Description
You are a market research analyst. You will be given a product category, and from that should determine the most relevant design metrics for it. These design metrics will subsequently be used by the engineering team to evaluate existing products and generate key design requirements for our new product to beat the competition.

For example, given the product category "laptop", the top 5 design metrics could be:
1. battery life
2. performance
3. portability
4. durability
5. keyboard quality

Another example, given the product category "accounting app", the top 3 design metrics could be:
1. ease of use
2. data security
3. integration with other tools

Note that we are focused solely on design metrics, hence other metrics like "customer support" and "price" are irrelevant as they cannot be met through design engineering efforts.

Also, try to avoid overlapping metrics. For example, "battery life" and "screen time" which are both related to battery performance should not be included together.

### Response Schema
{'properties': {'category': {'const': 'wireless over-ear headphones', 'description': 'Category that was analyzed.', 'title': 'Category', 'type': 'string'}, 'rationale': {'description': 'Detailed rationale for picking the below metrics.', 'title': 'Rationale', 'type': 'string'}, 'metrics': {'description': 'Concise metrics to focus on.', 'items': {'type': 'string'}, 'maxItems': 5, 'minItems': 5, 'title': 'Metrics', 'type': 'array'}}, 'required': ['category', 'rationale', 'metrics'], 'title': 'MetricsResult', 'type': 'object'}

### Task
Use the JSON schema specified above to provide the top **5** design metrics for product category "wireless over-ear headphones".

In [None]:
resp = client.models.generate_content(
    model=GOOGLE_AI_MODEL,
    contents=prompt_metrics,
    config=types.GenerateContentConfig(
        responseMimeType="application/json",
        responseSchema=MetricsResult,
    ),
)
result_metrics = resp.parsed
_ = debug(result_metrics)

/tmp/ipykernel_10913/269700729.py:10 <module>
    result_metrics: MetricsResult(
        category='wireless over-ear headphones',
        rationale=(
            'When designing wireless over-ear headphones, several key factors contribute to a superior user experience'
            '. Sound quality is paramount, encompassing clarity, balance, and bass response. Comfort is crucial for ex'
            'tended listening sessions, influenced by headband and ear cup design. Noise cancellation effectiveness si'
            'gnificantly impacts the listening experience in various environments. Battery life determines the conveni'
            'ence and usability of the headphones. Build quality and durability ensure longevity and resistance to wea'
            'r and tear.'
        ),
        metrics=[
            'Sound Quality',
            'Comfort',
            'Noise Cancellation',
            'Battery Life',
            'Durability',
        ],
    ) (MetricsResult)


MetricsResult(category='wireless over-ear headphones', rationale='When designing wireless over-ear headphones, several key factors contribute to a superior user experience. Sound quality is paramount, encompassing clarity, balance, and bass response. Comfort is crucial for extended listening sessions, influenced by headband and ear cup design. Noise cancellation effectiveness significantly impacts the listening experience in various environments. Battery life determines the convenience and usability of the headphones. Build quality and durability ensure longevity and resistance to wear and tear.', metrics=['Sound Quality', 'Comfort', 'Noise Cancellation', 'Battery Life', 'Durability'])

#### Finding Competitors
The goal is to prompt the LLM to find interesting, distinct competitors in different
niches. For example, after choosing Apple iPhone 16 as a competitor to study, if the LLM has
to choose between Samsung S23 (similar to Apple) and a more niche brand like Nothing
Phone (3a), it should pick the latter to get a more diverse set of competitors.

In [None]:
class CompetitorProduct(BaseModel):
    name: str = Field(description="Full name of the product.")
    reference: str = Field(description="Website URL referenced for the product.")
    reference_title: str = Field(description="Title of the reference website.")
    reference_summary: str = Field(
        description="Summary of the reference website's contents."
    )


class CompetitionResult(BaseModel):
    category: Literal[PRODUCT] = Field(
        description="Product category that was analyzed."
    )
    rationale: str = Field(
        description="Detailed rationale for picking the below competitors' products."
    )
    products: List[CompetitorProduct] = Field(
        min_length=NUM_COMPETITORS,
        max_length=NUM_COMPETITORS,
        description="Each selected product.",
    )

In [9]:
prompt_competition = f"""\
### Job Description
You are a market research analyst. You will be given a product category, and from \
that should determine the most relevant competitors' products to analyze. \
These competitors' products will subsequently be used by the engineering team to \
generate key design requirements for our new product to beat the competition.

For example, given the product category "laptop", the top 5 competitors' products \
could be:
1. Dell XPS 13 (2023)
2. MacBook Air (M3, 2024)
3. Lenovo ThinkPad X1 Carbon Gen 13
4. HP Spectre x360 (2017)
5. ASUS ZenBook 13 (2024)

Another example, given the product category "accounting app", the top 3 competitors' products \
could be:
1. QuickBooks
2. Xero
3. FreshBooks

Note that diversity of products is key here. It is especially good if the products \
fill different niches in the product category.

Also, try to avoid overlapping products. For example, "Dell XPS 13 (2013)" and "Dell XPS 15 (2013)" \
which are both from the same product line should not be included together. The only \
exception to this rule is if the product line is very different from each other, \
for example, "Lenovo Yoga 9i Gen 8" and "Lenovo Legion 7 Gen 8" are both laptops, but \
one is an ultraportable and the other is a gaming laptop, so they can be included together.

Where possible, include the full product name like (Vendor), (Product Name), (Generation or Year).

To ensure currency, please do a web search to find recent products on review or comparison sites. \
For example, you can search for "best laptops this year" or "best accounting apps this year",
though try and come up with better product-specific search terms.

### Response Schema
{CompetitionResult.model_json_schema()}

### Task
Use the JSON schema specified above to provide **{NUM_COMPETITORS}** competitor products \
that are in product category "{PRODUCT}".\
"""

display(Markdown(prompt_competition))

### Job Description
You are a market research analyst. You will be given a product category, and from that should determine the most relevant competitors' products to analyze. These competitors' products will subsequently be used by the engineering team to generate key design requirements for our new product to beat the competition.

For example, given the product category "laptop", the top 5 competitors' products could be:
1. Dell XPS 13 (2023)
2. MacBook Air (M3, 2024)
3. Lenovo ThinkPad X1 Carbon Gen 13
4. HP Spectre x360 (2017)
5. ASUS ZenBook 13 (2024)

Another example, given the product category "accounting app", the top 3 competitors' products could be:
1. QuickBooks
2. Xero
3. FreshBooks

Note that diversity of products is key here. It is especially good if the products fill different niches in the product category.

Also, try to avoid overlapping products. For example, "Dell XPS 13 (2013)" and "Dell XPS 15 (2013)" which are both from the same product line should not be included together. The only exception to this rule is if the product line is very different from each other, for example, "Lenovo Yoga 9i Gen 8" and "Lenovo Legion 7 Gen 8" are both laptops, but one is an ultraportable and the other is a gaming laptop, so they can be included together.

Where possible, include the full product name like (Vendor), (Product Name), (Generation or Year).

To ensure currency, please do a web search to find recent products on review or comparison sites. For example, you can search for "best laptops this year" or "best accounting apps this year",
though try and come up with better product-specific search terms.

### Response Schema
{'$defs': {'CompetitorProduct': {'properties': {'name': {'description': 'Full name of the product.', 'title': 'Name', 'type': 'string'}, 'reference': {'description': 'Website URL referenced for the product.', 'title': 'Reference', 'type': 'string'}, 'reference_title': {'description': 'Title of the reference website.', 'title': 'Reference Title', 'type': 'string'}, 'reference_summary': {'description': "Summary of the reference website's contents.", 'title': 'Reference Summary', 'type': 'string'}}, 'required': ['name', 'reference', 'reference_title', 'reference_summary'], 'title': 'CompetitorProduct', 'type': 'object'}}, 'properties': {'category': {'const': 'wireless over-ear headphones', 'description': 'Product category that was analyzed.', 'title': 'Category', 'type': 'string'}, 'rationale': {'description': "Detailed rationale for picking the below competitors' products.", 'title': 'Rationale', 'type': 'string'}, 'products': {'description': 'Each selected product.', 'items': {'$ref': '#/$defs/CompetitorProduct'}, 'maxItems': 5, 'minItems': 5, 'title': 'Products', 'type': 'array'}}, 'required': ['category', 'rationale', 'products'], 'title': 'CompetitionResult', 'type': 'object'}

### Task
Use the JSON schema specified above to provide **5** competitor products that are in product category "wireless over-ear headphones".

In [10]:
google_search_tool = types.Tool(
    google_search=types.GoogleSearch(),
)

# Sometimes, the model gets confused and outputs the JSON schema instead of the data.
# Assume if there's a valid reference URL, the response was valid.
while True:
    # Unable to submit request because controlled generation is not supported with google_search tool. Learn more: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/gemini
    # So we send it again to the model to extract the json just in case it messes up the schema.
    resp1 = client.models.generate_content(
        model=GOOGLE_AI_MODEL,
        contents=prompt_competition,
        config=types.GenerateContentConfig(
            tools=[google_search_tool],
            response_modalities=["TEXT"],
        ),
    )
    display(Markdown(resp1.text))

    # Now we extract the JSON from the text response.
    resp2 = client.models.generate_content(
        model=GOOGLE_AI_MODEL,
        contents=f"Extract JSON from this text without modifying the contents: {resp1.text}",
        config=types.GenerateContentConfig(
            responseMimeType="application/json",
            responseSchema=CompetitionResult,
        ),
    )

    result_competition = resp2.parsed
    debug(result_competition)

    # Check if the response is valid.
    if all(
        [
            re.match(r"^https?://", product.reference)
            for product in result_competition.products
        ]
    ):
        break
    else:
        print("Invalid response, retrying...")
        continue

<IPython.core.display.Markdown object>

/tmp/ipykernel_10913/2169418664.py:31 <module>
    result_competition: CompetitionResult(
        category='wireless over-ear headphones',
        rationale='string',
        products=[
            CompetitorProduct(
                name='string',
                reference='string',
                reference_title='string',
                reference_summary='string',
            ),
            CompetitorProduct(
                name='string',
                reference='string',
                reference_title='string',
                reference_summary='string',
            ),
            CompetitorProduct(
                name='string',
                reference='string',
                reference_title='string',
                reference_summary='string',
            ),
            CompetitorProduct(
                name='string',
                reference='string',
                reference_title='string',
                reference_summary='string',
            ),
            Compet

```json
{'category': 'wireless over-ear headphones', 'rationale': 'The selected headphones represent a range of top-rated wireless over-ear headphones available in the market today. They vary in price, features, and target audience, offering a comprehensive view of the competitive landscape. I selected these based on reviews from multiple sources to represent different needs of customers. The list includes options known for excellent noise cancellation (Bose, Sony), premium sound quality (Focal), value (Soundcore), and integration within the Apple ecosystem (AirPods Max).', 'products': [{'name': 'Bose QuietComfort Ultra Headphones', 'reference': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AWQVqAJz7xQmbsviPUH16B3jGD9657TtWjg-uPVKFOedGbbQx_-0M_UcJtmsNsho1bRNfiqy1sojTuLZPd_LRacICnrLfqFdUuTHTWuWD4TQgRoHUTQoAL8T7NYy3DXRJ5Nc1bqzS2A0oX_hS5Gfnf5Fnlj9-6sLmEKQanS6T5Ouwj_GGPo1BdOh1dVKQifT-SsPFazRDjscSQ==', 'reference_title': 'The <b>best wireless headphones</b> for <b>2025</b>: <b>Bluetooth</b> options for every budget - Engadget', 'reference_summary': 'Whether you\'re listening to playlists on your daily commute or zoning out with a podcast at home, wireless headphones can make your audio experience much more comfortable. With no cords to untangle or get caught on your bag, they\'re a great pick for anyone who wants convenience without compromising on sound quality. If comfort and immersive audio are high on your priority list, over-ear wireless headphones are often the way to go — they wrap around your ears to help block out the world and deliver rich detailed sound that earbuds can sometimes miss. There\'s a wide range of over-ear wireless headphones to choose from, whether you want active noise cancellation, long battery life or a comfy fit for marathon listening sessions.'}, {'name': 'Sony WH-1000XM5', 'reference': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AWQVqALj_JiWObcxrBl5zg2YNC2m89-W457PqnX12HKeUU83nihS8ZpKWN-O6_JiZrz84Wx7c1iec5t2e_fmG0HZkiohLj2VUIAglYzTDf-R2EX_QaodCZGP52JD0KVk_3QxIXZbVQUI3BYxtQzxfO32suj1VCbvkYmGRe50S9j64rqKBA==', 'reference_title': '<b>Best noise</b>-<b>canceling headphones</b> for <b>2025</b> - Crutchfield', 'reference_summary': 'Best noise-canceling headphones for 2025\n- Best budget wireless noise-canceling headphones — Cleer Enduro ANC.\n- Best for noise cancellation — Bose QuietComfort Ultra Earbuds.\n- Best wireless noise-canceling earbuds for running and workouts — JBL Live Free 2.\n- Best-looking wireless noise-canceling headphones — Bowers &amp; Wilkins PX8.\n- Best wireless noise-canceling headphones for Apple Users — Apple AirPods Pro 2.\n- Best-sounding wireless noise-canceling over-ears — Focal Bathys.\n- Best-sounding wireless noise-canceling earbuds — Sennheiser Momentum True Wireless 4.\n- Best all-around wireless noise-canceling headphones — Sony WH-1000XM5.'}, {'name': 'Focal Bathys', 'reference': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AWQVqAI33shFACGYg7qTI_oDHIzy10phg9ansD5uaDmxyavuXO_C_st0K9pu1dkyNlJtgHpiZiIPhyvdwr_3k1Omqcn2F5PMtzFP9URs6cZoEfcKKyiuA-2QU_npIdYET7U13PWrK3tMeb-Z2Yp94qwM0-I7g4miV63C29tbb4qlZNskbnObGkZR7k4EC-8IWHnbEQ==', 'reference_title': 'The 8 <b>Best Wireless Headphones</b> We Tested <b>On</b> Subways, Flights And More - Forbes', 'reference_summary': "After rigorously testing 22 pairs, our team's top pick for the best wireless headphones is the Bose QuietComfort Ultra: They feature gorgeous, rich audio that's easy to adjust, and they feel comfortable to wear for hours after multiple transatlantic flights. For casual listeners who don't want to spend a fortune, the Soundcore Space Ones ring in at around $100 and bring surprisingly good audio quality."}, {'name': 'Anker Soundcore Space One', 'reference': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AWQVqAI33shFACGYg7qTI_oDHIzy10phg9ansD5uaDmxyavuXO_C_st0K9pu1dkyNlJtgHpiZiIPhyvdwr_3k1Omqcn2F5PMtzFP9URs6cZoEfcKKyiuA-2QU_npIdYET7U13PWrK3tMeb-Z2Yp94qwM0-I7g4miV63C29tbb4qlZNskbnObGkZR7k4EC-8IWHnbEQ==', 'reference_title': 'The 8 <b>Best Wireless Headphones</b> We Tested <b>On</b> Subways, Flights And More - Forbes', 'reference_summary': "After rigorously testing 22 pairs, our team's top pick for the best wireless headphones is the Bose QuietComfort Ultra: They feature gorgeous, rich audio that's easy to adjust, and they feel comfortable to wear for hours after multiple transatlantic flights. For casual listeners who don't want to spend a fortune, the Soundcore Space Ones ring in at around $100 and bring surprisingly good audio quality."}, {'name': 'Apple AirPods Max', 'reference': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AWQVqAJz7xQmbsviPUH16B3jGD9657TtWjg-uPVKFOedGbbQx_-0M_UcJtmsNsho1bRNfiqy1sojTuLZPd_LRacICnrLfqFdUuTHTWuWD4TQgRoHUTQoAL8T7NYy3DXRJ5Nc1bqzS2A0oX_hS5Gfnf5Fnlj9-6sLmEKQanS6T5Ouwj_GGPo1BdOh1dVKQifT-SsPFazRDjscSQ==', 'reference_title': 'The <b>best wireless headphones</b> for <b>2025</b>: <b>Bluetooth</b> options for every budget - Engadget', 'reference_summary': 'Whether you\'re listening to playlists on your daily commute or zoning out with a podcast at home, wireless headphones can make your audio experience much more comfortable. With no cords to untangle or get caught on your bag, they\'re a great pick for anyone who wants convenience without compromising on sound quality. If comfort and immersive audio are high on your priority list, over-ear wireless headphones are often the way to go — they wrap around your ears to help block out the world and deliver rich detailed sound that earbuds can sometimes miss. There\'s a wide range of over-ear wireless headphones to choose from, whether you want active noise cancellation, long battery life or a comfy fit for marathon listening sessions.'}]}
```

/tmp/ipykernel_10913/2169418664.py:31 <module>
    result_competition: CompetitionResult(
        category='wireless over-ear headphones',
        rationale=(
            'The selected headphones represent a range of top-rated wireless over-ear headphones available in the mark'
            'et today. They vary in price, features, and target audience, offering a comprehensive view of the competi'
            'tive landscape. I selected these based on reviews from multiple sources to represent different needs of c'
            'ustomers. The list includes options known for excellent noise cancellation (Bose, Sony), premium sound qu'
            'ality (Focal), value (Soundcore), and integration within the Apple ecosystem (AirPods Max).'
        ),
        products=[
            CompetitorProduct(
                name='Bose QuietComfort Ultra Headphones',
                reference=(
                    'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AWQVqAJz7xQmbsviPUH16B3

#### Get Common Technical Specifications to Look Out For
This is later used as examples to help the extraction model find important technical
specifications.

In [11]:
class SpecsResult(BaseModel):
    category: Literal[PRODUCT] = Field(description="Category that was analyzed")
    rationale: str = Field(
        description="Detailed rationale for naming the below technical specifications"
    )
    specs: List[str] = Field(
        max_length=MAX_SPECS,
        description="Frequently compared technical specifications",
    )

In [None]:
prompt_tech_specs = f"""\
### Job Description
You are a market research analyst. You will be given a product category and a list of \
competitors' products within the category. From that, you shall determine a list \
of frequently compared technical specifications, which will be used by your junior \
analyst to compare different products. Carefully think about how "{PRODUCT}" is used \
(and if applicable, transported), to determine what consumers care about.

### Response Schema
{SpecsResult.model_json_schema()}

### Task
The products are:
{"\n".join(f"- {o.name}" for o in result_competition.products)}

Use the JSON schema to provide at most **{MAX_SPECS}** technical specifications \
that are frequently compared for product category "{PRODUCT}".\
"""

display(Markdown(prompt_tech_specs))

### Job Description
You are a market research analyst. You will be given a product category and a list of competitors' products within the category. From that, you shall determine a list of frequently compared technical specifications, which will be used by your junior analyst to compare different products. Carefully think about how "wireless over-ear headphones" is used (and if applicable, transported), to determine what consumers care about.

### Response Schema
{'properties': {'category': {'const': 'wireless over-ear headphones', 'description': 'Category that was analyzed', 'title': 'Category', 'type': 'string'}, 'rationale': {'description': 'Detailed rationale for naming the below technical specifications', 'title': 'Rationale', 'type': 'string'}, 'specs': {'description': 'Frequently compared technical specifications', 'items': {'type': 'string'}, 'maxItems': 10, 'title': 'Specs', 'type': 'array'}}, 'required': ['category', 'rationale', 'specs'], 'title': 'SpecsResult', 'type': 'object'}

### Task
The products are:
- Bose QuietComfort Ultra Headphones
- Sony WH-1000XM5
- Focal Bathys
- Anker Soundcore Space One
- Apple AirPods Max

Use the JSON schema to provide at most **10** technical specifications that are frequently compared for product category "wireless over-ear headphones".

In [13]:
resp = client.models.generate_content(
    model=GOOGLE_AI_MODEL,
    contents=prompt_tech_specs,
    config=types.GenerateContentConfig(
        responseMimeType="application/json",
        responseSchema=SpecsResult,
    ),
)
result_specs = resp.parsed
debug(result_specs)

/tmp/ipykernel_10913/339006114.py:10 <module>
    result_specs: SpecsResult(
        category='wireless over-ear headphones',
        rationale=(
            'When evaluating wireless over-ear headphones, consumers frequently compare specifications related to audi'
            'o quality, noise cancellation effectiveness, comfort and fit, battery life, connectivity options, and por'
            'tability. Durability and the inclusion of features like a microphone for calls are also important conside'
            'rations.'
        ),
        specs=[
            'Noise Cancellation Technology',
            'Audio Codec Support (e.g., AAC, SBC, aptX, LDAC)',
            'Battery Life',
            'Driver Size',
            'Impedance',
            'Bluetooth Version',
            'Weight',
            'Comfort and Fit (e.g., headband padding, ear cup material)',
            'Microphone Quality',
            'Water Resistance Rating',
        ],
    ) (SpecsResult)


SpecsResult(category='wireless over-ear headphones', rationale='When evaluating wireless over-ear headphones, consumers frequently compare specifications related to audio quality, noise cancellation effectiveness, comfort and fit, battery life, connectivity options, and portability. Durability and the inclusion of features like a microphone for calls are also important considerations.', specs=['Noise Cancellation Technology', 'Audio Codec Support (e.g., AAC, SBC, aptX, LDAC)', 'Battery Life', 'Driver Size', 'Impedance', 'Bluetooth Version', 'Weight', 'Comfort and Fit (e.g., headband padding, ear cup material)', 'Microphone Quality', 'Water Resistance Rating'])

#### Post Processing
Google hides the URL from the model, so we need to extract it by resolving the redirect.
Finally, save everything to a JSON file for use in the later stages of the pipeline.

In [14]:
def resolve_redirect(url):
    if url.startswith("https://vertexaisearch.cloud.google.com/grounding-api-redirect"):
        resp = requests.get(url, allow_redirects=False)
        if 300 <= resp.status_code < 400:
            return resp.headers.get("Location")
    return url

In [None]:
output = {}

assert (
    result_competition.category == result_metrics.category
    and result_competition.category == PRODUCT
)
output["category"] = PRODUCT
output["metrics_rationale"] = result_metrics.rationale
output["metrics"] = result_metrics.metrics
output["competition_rationale"] = result_competition.rationale
output["competition_products"] = []
output["specs_rationale"] = result_specs.rationale
output["specs"] = result_specs.specs

for product in result_competition.products:
    product_dict = {
        "name": product.name,
        "reference": resolve_redirect(product.reference),
        "reference_title": product.reference_title,
        "reference_summary": product.reference_summary,
    }
    output["competition_products"].append(product_dict)

display(output)

{'category': 'wireless over-ear headphones',
 'metrics_rationale': 'When designing wireless over-ear headphones, several key factors contribute to a superior user experience. Sound quality is paramount, encompassing clarity, balance, and bass response. Comfort is crucial for extended listening sessions, influenced by headband and ear cup design. Noise cancellation effectiveness significantly impacts the listening experience in various environments. Battery life determines the convenience and usability of the headphones. Build quality and durability ensure longevity and resistance to wear and tear.',
 'metrics': ['Sound Quality',
  'Comfort',
  'Noise Cancellation',
  'Battery Life',
  'Durability'],
 'competition_rationale': 'The selected headphones represent a range of top-rated wireless over-ear headphones available in the market today. They vary in price, features, and target audience, offering a comprehensive view of the competitive landscape. I selected these based on reviews from

In [16]:
with open(DATA_DIR / "stage_1.json", "w") as f:
    json.dump(output, f, indent=2)