In [1]:
from google import genai
from google.genai import types
from IPython.display import Markdown
from pydantic import BaseModel, Field, ConfigDict
from devtools import debug
from pathlib import Path
from typing import *
import requests

### Config

#### Product to Research
Most important config setting! Change to the product category you wish to study.
Also specify the number of metrics to consider.

In [2]:
PRODUCT = "earbuds"
NUM_METRICS = 5
NUM_COMPETITORS = 5
DATA_DIR = Path("session") / PRODUCT

DATA_DIR.mkdir(parents=True, exist_ok=True)

#### API Keys and Model
See <https://ai.google.dev/gemini-api/docs/api-key>

In [3]:
GOOGLE_AI_KEY = "AIzaSyDAlPx7St5BUXqlwiqFKvlT-Sc2dnTT4Jc"
GOOGLE_AI_MODEL = "gemini-2.0-flash"

### Operations

#### Setup

**Init API Connection**

In [4]:
client = genai.Client(api_key=GOOGLE_AI_KEY)

#### Determining Key Metrics
As part of the automated process, we prompt a model to brainstorm key metrics for the product category. This serves as a starting point by which to analyze competitor products.

In [5]:
# BTW, putting rationale before metrics is a way to force the model to think first.
class MetricsResult(BaseModel):
    category: Literal[PRODUCT] = Field(description="Category that was analyzed.")
    rationale: str = Field(description="Detailed rationale for picking the below metrics.")
    metrics: List[str] = Field(min_length=NUM_METRICS, max_length=NUM_METRICS, description="Concise metrics to focus on.")

In [6]:
prompt_metrics = f"""\
### Job Description
You are a market research analyst. You will be given a product category, and from \
that should determine the most relevant design metrics for it. These design \
metrics will subsequently be used by the engineering team to evaluate existing products \
and generate key design requirements for our new product to beat the competition.

For example, given the product category "laptop", the top 5 design metrics \
could be:
1. battery life
2. performance
3. portability
4. durability
5. keyboard quality

Another example, given the product category "accounting app", the top 3 design metrics \
could be:
1. ease of use
2. data security
3. integration with other tools

Note that we are focused solely on design metrics, hence other metrics like "customer support" \
and "price" are irrelevant as they cannot be met through design engineering efforts.

Also, try to avoid overlapping metrics. For example, "battery life" and "screen time" \
which are both related to battery performance should not be included together.

### Task
Given the product category "{PRODUCT}", please provide the top {NUM_METRICS} design metrics \
that are most relevant to it in JSON. Follow the JSON schema below:

{MetricsResult.model_json_schema()}\
"""

display(Markdown(prompt_metrics))

### Job Description
You are a market research analyst. You will be given a product category, and from that should determine the most relevant design metrics for it. These design metrics will subsequently be used by the engineering team to evaluate existing products and generate key design requirements for our new product to beat the competition.

For example, given the product category "laptop", the top 5 design metrics could be:
1. battery life
2. performance
3. portability
4. durability
5. keyboard quality

Another example, given the product category "accounting app", the top 3 design metrics could be:
1. ease of use
2. data security
3. integration with other tools

Note that we are focused solely on design metrics, hence other metrics like "customer support" and "price" are irrelevant as they cannot be met through design engineering efforts.

Also, try to avoid overlapping metrics. For example, "battery life" and "screen time" which are both related to battery performance should not be included together.

### Task
Given the product category "earbuds", please provide the top 5 design metrics that are most relevant to it in JSON. Follow the JSON schema below:

{'properties': {'category': {'const': 'earbuds', 'description': 'Category that was analyzed.', 'title': 'Category', 'type': 'string'}, 'rationale': {'description': 'Detailed rationale for picking the below metrics.', 'title': 'Rationale', 'type': 'string'}, 'metrics': {'description': 'Concise metrics to focus on.', 'items': {'type': 'string'}, 'maxItems': 5, 'minItems': 5, 'title': 'Metrics', 'type': 'array'}}, 'required': ['category', 'rationale', 'metrics'], 'title': 'MetricsResult', 'type': 'object'}

In [7]:
resp = client.models.generate_content(
    model=GOOGLE_AI_MODEL,
    contents=prompt_metrics,
    config=types.GenerateContentConfig(
        responseMimeType="application/json",
        responseSchema=MetricsResult,
    ),
)
result_metrics = resp.parsed
debug(result_metrics)

/tmp/ipykernel_2378016/269700729.py:10 <module>
    result_metrics: MetricsResult(
        category='earbuds',
        rationale=(
            'These metrics are crucial for evaluating and designing high-quality earbuds. Sound quality is paramount f'
            'or user satisfaction. Comfort and fit ensure prolonged use without discomfort. Battery life determines th'
            'e duration of uninterrupted listening. Durability ensures the longevity of the product. Noise cancellatio'
            'n enhances the listening experience in various environments.'
        ),
        metrics=[
            'sound quality',
            'comfort and fit',
            'battery life',
            'durability',
            'noise cancellation',
        ],
    ) (MetricsResult)


MetricsResult(category='earbuds', rationale='These metrics are crucial for evaluating and designing high-quality earbuds. Sound quality is paramount for user satisfaction. Comfort and fit ensure prolonged use without discomfort. Battery life determines the duration of uninterrupted listening. Durability ensures the longevity of the product. Noise cancellation enhances the listening experience in various environments.', metrics=['sound quality', 'comfort and fit', 'battery life', 'durability', 'noise cancellation'])

#### Finding Competitors
The goal is to prompt the LLM to find interesting, distinct competitors in different
niches. For example, after choosing Apple iPhone 16 as a competitor to study, if the LLM has
to choose between Samsung S23 (similar to Apple) and a more niche brand like Nothing
Phone (3a), it should pick the latter to get a more diverse set of competitors.

In [8]:
class CompetitorProduct(BaseModel):
    name: str = Field(description="Full name of the product.")
    reference: str = Field(description="Website URL referenced for the product.")
    reference_title: str = Field(description="Title of the reference website.")
    reference_summary: str = Field(description="Summary of the reference website's contents.")

class CompetitionResult(BaseModel):
    category: Literal[PRODUCT] = Field(description="Product category that was analyzed.")
    rationale: str = Field(description="Detailed rationale for picking the below competitors' products.")
    products: List[CompetitorProduct] = Field(min_length=NUM_COMPETITORS, max_length=NUM_COMPETITORS, description="Each selected product.")

In [9]:
prompt_competition = f"""\
### Job Description
You are a market research analyst. You will be given a product category, and from \
that should determine the most relevant competitors' products to analyze. \
These competitors' products will subsequently be used by the engineering team to \
generate key design requirements for our new product to beat the competition.

For example, given the product category "laptop", the top 5 competitors' products \
could be:
1. Dell XPS 13 (2023)
2. MacBook Air (M3, 2024)
3. Lenovo ThinkPad X1 Carbon Gen 13
4. HP Spectre x360 (2017)
5. ASUS ZenBook 13 (2024)

Another example, given the product category "accounting app", the top 3 competitors' products \
could be:
1. QuickBooks
2. Xero
3. FreshBooks

Note that diversity of products is key here. It is especially good if the products \
fill different niches in the product category.

Also, try to avoid overlapping products. For example, "Dell XPS 13 (2013)" and "Dell XPS 15 (2013)" \
which are both from the same product line should not be included together. The only \
exception to this rule is if the product line is very different from each other, \
for example, "Lenovo Yoga 9i Gen 8" and "Lenovo Legion 7 Gen 8" are both laptops, but \
one is an ultraportable and the other is a gaming laptop, so they can be included together.

Where possible, include the full product name like (Vendor), (Product Name), (Generation or Year).

To ensure currency, please do a web search to find recent products on review or comparison sites. \
For example, you can search for "best laptops this year" or "best accounting apps this year",
though try and come up with better product-specific search terms.

### Task
Given the product category "{PRODUCT}", please provide the top {NUM_COMPETITORS} competitors' products \
that are most relevant to it in JSON. Follow the JSON schema below:

{CompetitionResult.model_json_schema()}\
"""

display(Markdown(prompt_competition))

### Job Description
You are a market research analyst. You will be given a product category, and from that should determine the most relevant competitors' products to analyze. These competitors' products will subsequently be used by the engineering team to generate key design requirements for our new product to beat the competition.

For example, given the product category "laptop", the top 5 competitors' products could be:
1. Dell XPS 13 (2023)
2. MacBook Air (M3, 2024)
3. Lenovo ThinkPad X1 Carbon Gen 13
4. HP Spectre x360 (2017)
5. ASUS ZenBook 13 (2024)

Another example, given the product category "accounting app", the top 3 competitors' products could be:
1. QuickBooks
2. Xero
3. FreshBooks

Note that diversity of products is key here. It is especially good if the products fill different niches in the product category.

Also, try to avoid overlapping products. For example, "Dell XPS 13 (2013)" and "Dell XPS 15 (2013)" which are both from the same product line should not be included together. The only exception to this rule is if the product line is very different from each other, for example, "Lenovo Yoga 9i Gen 8" and "Lenovo Legion 7 Gen 8" are both laptops, but one is an ultraportable and the other is a gaming laptop, so they can be included together.

Where possible, include the full product name like (Vendor), (Product Name), (Generation or Year).

To ensure currency, please do a web search to find recent products on review or comparison sites. For example, you can search for "best laptops this year" or "best accounting apps this year",
though try and come up with better product-specific search terms.

### Task
Given the product category "earbuds", please provide the top 5 competitors' products that are most relevant to it in JSON. Follow the JSON schema below:

{'$defs': {'CompetitorProduct': {'properties': {'name': {'description': 'Full name of the product.', 'title': 'Name', 'type': 'string'}, 'reference': {'description': 'Website URL referenced for the product.', 'title': 'Reference', 'type': 'string'}, 'reference_title': {'description': 'Title of the reference website.', 'title': 'Reference Title', 'type': 'string'}, 'reference_summary': {'description': "Summary of the reference website's contents.", 'title': 'Reference Summary', 'type': 'string'}}, 'required': ['name', 'reference', 'reference_title', 'reference_summary'], 'title': 'CompetitorProduct', 'type': 'object'}}, 'properties': {'category': {'const': 'earbuds', 'description': 'Product category that was analyzed.', 'title': 'Category', 'type': 'string'}, 'rationale': {'description': "Detailed rationale for picking the below competitors' products.", 'title': 'Rationale', 'type': 'string'}, 'products': {'description': 'Each selected product.', 'items': {'$ref': '#/$defs/CompetitorProduct'}, 'maxItems': 5, 'minItems': 5, 'title': 'Products', 'type': 'array'}}, 'required': ['category', 'rationale', 'products'], 'title': 'CompetitionResult', 'type': 'object'}

In [11]:
google_search_tool = types.Tool(
    google_search = types.GoogleSearch(),
)

# Unable to submit request because controlled generation is not supported with google_search tool. Learn more: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/gemini
# So we send it again to the model to extract the json just in case it messes up the schema.
resp1 = client.models.generate_content(
    model=GOOGLE_AI_MODEL,
    contents=prompt_competition,
    config=types.GenerateContentConfig(
        tools=[google_search_tool],
        response_modalities=["TEXT"],
    )
)
display(Markdown(resp1.text))

# Now we extract the JSON from the text response.
resp2 = client.models.generate_content(
    model=GOOGLE_AI_MODEL,
    contents=f"Extract JSON from this text without modifying the contents: {resp1.text}",
    config=types.GenerateContentConfig(
        responseMimeType="application/json",
        responseSchema=CompetitionResult,
    ),
)

result_competition = resp2.parsed
debug(result_competition)

```json
{'category': 'earbuds', 'rationale': 'To identify top competitors in the earbud market, I focused on recent reviews and comparisons from reputable sources. I aimed for a mix of brands and models that excel in different areas such as noise cancellation, sound quality, comfort, and special features like workout-friendly designs. I have focused on the models available in 2024 and 2025 to ensure currency. This approach provides a comprehensive view of the competitive landscape, useful for informing design requirements for a new product.', 'products': [{'name': 'Bose QuietComfort Ultra Earbuds', 'reference': 'https://www.pcmag.com/reviews/bose-quietcomfort-ultra-earbuds', 'reference_title': 'Bose QuietComfort Ultra Earbuds Review', 'reference_summary': 'The Bose QuietComfort Ultra Earbuds are highlighted as offering the best noise cancellation, making them a top pick for users prioritizing ANC. They are also recognized as best for iPhone users.'}, {'name': 'Technics EAH-AZ100S', 'reference': 'https://www.forbes.com/sites/rebeccaissaacs/2025/04/07/best-wireless-earbuds/?sh=6f34c2954699', 'reference_title': 'The Best Wireless Earbuds, Tested By Our Tech Editor - Forbes', 'reference_summary': 'The Technics EAH-AZ100S are named the best overall wireless earbuds for 2025, combining incredible audio quality, decent noise cancellation, and battery life. They are also noted for their rounded design with aluminum elements.'}, {'name': 'Apple AirPods Pro 2 (USB-C)', 'reference': 'https://www.mashable.com/roundup/best-earbuds', 'reference_title': 'The 8 best earbuds to buy in 2025 from Bose, Sony, and more - Mashable', 'reference_summary': 'The Apple AirPods Pro (USB-C) are recommended as the best earbuds for Apple users, balancing audio quality and affordability. They offer excellent active noise cancellation and Adaptive Transparency Mode.'}, {'name': 'Sony WF-1000XM5', 'reference': 'https://www.whathifi.com/best-buys/headphones/best-wireless-earbuds', 'reference_title': 'Best wireless earbuds 2025: top pairs tested by our reviewers - What Hi-Fi?', 'reference_summary': 'The Sony WF-1000XM5 are recognized as the best overall wireless earbuds, offering a fantastic blend of performance and features. They are praised for their comfortable design, noise-cancelling capabilities, and call quality.'}, {'name': 'Jabra Elite 8 Active Gen 2', 'reference': 'https://www.rtings.com/headphones/reviews/best/running-workout', 'reference_title': 'The 7 Best Wireless Earbuds For Running And Working Out of 2025 - RTINGS.com', 'reference_summary': 'The Jabra Elite 8 Active Gen 2 are highlighted as the best wireless Bluetooth earbuds for running, featuring a stable design and sweat-resistant coating. They also have an intuitive control scheme and almost eight hours of continuous battery life.'}]}
```

/tmp/ipykernel_2378016/2132460809.py:28 <module>
    result_competition: CompetitionResult(
        category='earbuds',
        rationale=(
            'To identify top competitors in the earbud market, I focused on recent reviews and comparisons from reputa'
            'ble sources. I aimed for a mix of brands and models that excel in different areas such as noise cancellat'
            'ion, sound quality, comfort, and special features like workout-friendly designs. I have focused on the mo'
            'dels available in 2024 and 2025 to ensure currency. This approach provides a comprehensive view of the co'
            'mpetitive landscape, useful for informing design requirements for a new product.'
        ),
        products=[
            CompetitorProduct(
                name='Bose QuietComfort Ultra Earbuds',
                reference='https://www.pcmag.com/reviews/bose-quietcomfort-ultra-earbuds',
                reference_title='Bose QuietComfort Ultra Earbuds Review',
   

CompetitionResult(category='earbuds', rationale='To identify top competitors in the earbud market, I focused on recent reviews and comparisons from reputable sources. I aimed for a mix of brands and models that excel in different areas such as noise cancellation, sound quality, comfort, and special features like workout-friendly designs. I have focused on the models available in 2024 and 2025 to ensure currency. This approach provides a comprehensive view of the competitive landscape, useful for informing design requirements for a new product.', products=[CompetitorProduct(name='Bose QuietComfort Ultra Earbuds', reference='https://www.pcmag.com/reviews/bose-quietcomfort-ultra-earbuds', reference_title='Bose QuietComfort Ultra Earbuds Review', reference_summary='The Bose QuietComfort Ultra Earbuds are highlighted as offering the best noise cancellation, making them a top pick for users prioritizing ANC. They are also recognized as best for iPhone users.'), CompetitorProduct(name='Techni

#### Post Processing
Google hides the URL from the model, so we need to extract it by resolving the redirect.
Finally, save everything to a JSON file for use in the later stages of the pipeline.

In [12]:
def resolve_redirect(url):
    resp = requests.get(url, allow_redirects=False)
    if 300 <= resp.status_code < 400:
        return resp.headers.get("Location")
    return url

In [13]:
output = {}

assert result_competition.category == result_metrics.category and result_competition.category == PRODUCT
output["category"] = PRODUCT
output["metrics_rationale"] = result_metrics.rationale
output["metrics"] = result_metrics.metrics
output["competition_rationale"] = result_competition.rationale
output["competition_products"] = []

for product in result_competition.products:
    product_dict = {
        "name": product.name,
        "reference": resolve_redirect(product.reference),
        "reference_title": product.reference_title,
        "reference_summary": product.reference_summary,
    }
    output["competition_products"].append(product_dict)

output

{'category': 'earbuds',
 'metrics_rationale': 'These metrics are crucial for evaluating and designing high-quality earbuds. Sound quality is paramount for user satisfaction. Comfort and fit ensure prolonged use without discomfort. Battery life determines the duration of uninterrupted listening. Durability ensures the longevity of the product. Noise cancellation enhances the listening experience in various environments.',
 'metrics': ['sound quality',
  'comfort and fit',
  'battery life',
  'durability',
  'noise cancellation'],
 'competition_rationale': 'To identify top competitors in the earbud market, I focused on recent reviews and comparisons from reputable sources. I aimed for a mix of brands and models that excel in different areas such as noise cancellation, sound quality, comfort, and special features like workout-friendly designs. I have focused on the models available in 2024 and 2025 to ensure currency. This approach provides a comprehensive view of the competitive landscap

In [14]:
with open(DATA_DIR / "stage_1.json", "w") as f:
    import json
    json.dump(output, f, indent=2)