Continues from 2_find_datasheet.ipynb

In [1]:
from google import genai
from google.genai import types
from IPython.display import Markdown
from pydantic import BaseModel, RootModel, Field, create_model
from devtools import debug
from pathlib import Path
from typing import *
from tqdm.auto import tqdm
import json
import time

### Config

In [2]:
PRODUCT = "earbuds"
MAX_SPECS = 20

In [3]:
DATA_DIR = Path("session") / PRODUCT

STAGE1_PATH = DATA_DIR / "stage_1.json"
assert STAGE1_PATH.exists(), "Run 1_describe_product.ipynb first!"

DATASHEET_PATH = DATA_DIR / "datasheets.json"
assert DATASHEET_PATH.exists(), "Run 2_find_datasheet.ipynb first!"

SPECS_PATH = DATA_DIR / "specs.json"

In [4]:
# Should be from Google AI Studio.
GOOGLE_AI_KEY = "AIzaSyDAlPx7St5BUXqlwiqFKvlT-Sc2dnTT4Jc"
# 2.0 Flash since free 1500 RPD and Gemma's structured output is disabled.
GOOGLE_AI_MODEL = "gemini-2.0-flash"
# GOOGLE_AI_MODEL = "gemini-2.5-flash-preview-04-17"

### Operations

#### Setup

In [5]:
client = genai.Client(api_key=GOOGLE_AI_KEY)

In [6]:
with open(DATASHEET_PATH) as f:
    datasheets = json.load(f)

competitors = list(datasheets.keys())
print(competitors)

['Sony WF-1000XM5', 'Bose QuietComfort Ultra Earbuds', 'OnePlus Buds 3']


In [7]:
with open(STAGE1_PATH) as f:
    stage1_meta = json.load(f)

print(stage1_meta["specs"])

['Noise Cancellation', 'Battery Life', 'Water Resistance', 'Bluetooth Version', 'Charging Case Features', 'Microphone Quality', 'Comfort and Fit', 'Audio Codecs', 'Driver Size', 'Impedance']


#### Extracting Tabulated Specs from Datasheet HTML
One advantage over using Gemini Grounded Search is more control over how the
sources are found. The other is using only one request instead of two, since
Grounded Search Mode disables Structured Output, but feeding it HTML in context
doesn't.

In [8]:
class Spec(BaseModel):
    """A single spec for a product."""

    name: str = Field(description="Name of the technical specification")
    description: str = Field(
        description="Detailed explanation of what the specification means"
    )
    value: str = Field(description="Exact/concise value of the specification")
    unit: str = Field(
        description="Units for the specification, leave empty if not applicable"
    )


class ProductSpecNames(BaseModel):
    """A list of specs for a product."""

    product: str = Field(description="Name of the product")
    names: List[str] = Field(
        max_length=MAX_SPECS * 2,
        description="List of technical specification names for the product",
    )
    ordered: List[str] = Field(
        max_length=MAX_SPECS,
        description="Same as names, but ordered by importance from most common to least common",
    )

In [9]:
def prompt_specs(product_name, html):
    return f"""\
### Webpage
```html
{html}
```

### Job Description
Your job is to extract the names of the technical specifications of {product_name} {PRODUCT} \
from its HTML webpage above.

First, extract the names of all technical specifications. \
Some may be grouped under the same heading but should be listed separately.

Second, order the names by importance from most common to least common, keeping \
only the top **{MAX_SPECS}** technical specifications frequently compared for {PRODUCT}.

Some examples of technical specifications are:
{"\n".join(f"- {s}" for s in stage1_meta["specs"])}

However, do note that you should write the names of the specifications found as-is \
from the webpage, and not the examples above.

Use the following JSON schema below:

### Response Schema
{ProductSpecNames.model_json_schema()}\
"""


def prompt_specs_specific(product_name, html, specs):
    inner = create_model(
        "Specs",
        **{spec: (Spec, ...) for spec in specs},
    )
    schema = create_model(
        "ProductSpecs",
        product=(Literal[product_name], Field(description="Name of the product")),
        specs=(inner, Field(description="List of specs for the product")),
    )

    return (
        f"""\
### Webpage
```html
{html}
```

### Job Description
Your job is to extract the values of the technical specifications of {product_name} {PRODUCT} \
from its HTML webpage. Use the context of the webpage to explain what each specification \
means as it will be used later. The specifications to extract are:
{"\n".join(f"- {s}" for s in specs)}

Finally, use the following JSON schema below:

### Response Schema
{schema.model_json_schema()}\
""",
        schema,
    )


def extract_specs(product_name, html):
    prompt = prompt_specs(product_name, html)
    names = None
    while names is None:
        try:
            resp = client.models.generate_content(
                model=GOOGLE_AI_MODEL,
                contents=prompt,
                config=types.GenerateContentConfig(
                    responseMimeType="application/json",
                    responseSchema=ProductSpecNames,
                ),
            )
            names = resp.parsed
        except Exception as e:
            print(f"Error: {e}")

        if names is None:
            print("Retrying...")
            time.sleep(3)  # Avoid rate limit
            continue

    names = names.ordered
    print(f"Extracted {len(names)} specs: {names}")
    print("Extracting values...")
    time.sleep(3)  # Avoid rate limit

    prompt2, schema = prompt_specs_specific(product_name, html, names)
    # display(Markdown(prompt2))

    data = None
    while data is None:
        try:
            resp2 = client.models.generate_content(
                model=GOOGLE_AI_MODEL,
                contents=prompt2,
                config=types.GenerateContentConfig(
                    responseMimeType="application/json",
                    responseSchema=schema,
                ),
            )
            data = resp2.parsed
        except Exception as e:
            print(f"Error: {e}")

        if data is None:
            print("Retrying...")
            time.sleep(3)  # Avoid rate limit
            continue

    return data


In [10]:
all_specs_raw = {}

for competitor, data in tqdm(datasheets.items()):
    print(f"Extracting: {competitor}")
    html = data["html"]
    specs = extract_specs(competitor, html)
    # debug(specs)
    all_specs_raw[competitor] = specs.model_dump()
    print(f"Extracted: {competitor}")
    time.sleep(3)  # Avoid rate limit

  0%|          | 0/3 [00:00<?, ?it/s]

Extracting: Sony WF-1000XM5
Extracted 20 specs: ['Noise Canceling', 'Battery Life', 'Waterproof', 'Bluetooth® Specification', 'Supported Audio Format(S)', 'WEIGHT', 'DRIVER UNIT', 'HEADPHONE TYPE', 'FREQUENCY RESPONSE (BLUETOOTH® COMMUNICATION)', 'BATTERY CHARGE TIME', 'BATTERY CHARGE METHOD', 'BATTERY LIFE (CONTINUOUS MUSIC PLAYBACK TIME)', 'BATTERY LIFE (CONTINUOUS COMMUNICATION TIME)', 'FREQUENCY RANGE', 'BLUETOOTH® VERSION', 'PROFILE', 'Charging Case', 'DIMENSION', 'DSEE Extreme', 'AMBIENT SOUND MODE']
Extracting values...
Extracted: Sony WF-1000XM5
Extracting: Bose QuietComfort Ultra Earbuds
Error: 499 CANCELLED. {'error': {'code': 499, 'message': 'The operation was cancelled.', 'status': 'CANCELLED'}}
Retrying...
Extracted 15 specs: ['Noise Cancelling', 'Microphones', 'Water Resistant', 'Headphone Fit', 'Battery Charge Time', 'Rechargeable', 'Wireless Connectivity', 'Charging Interface(s)', 'Case', 'Product Material', 'Product Case Material', 'Noise Control Type', 'Charging Acces

In [None]:
_ = debug(all_specs_raw)

/tmp/ipykernel_24769/1955527673.py:1 <module>
    all_specs_raw: {
        'Sony WF-1000XM5': {
            'product': 'Sony WF-1000XM5',
            'specs': {
                'Noise Canceling': {
                    'name': 'Noise Canceling',
                    'description': 'Indicates whether the headphones have noise canceling feature to reduce external noise.',
                    'value': 'Yes',
                    'unit': '',
                },
                'Battery Life': {
                    'name': 'Battery Life',
                    'description': 'General information about the battery life of the product.',
                    'value': 'Max. 8 hrs (NC ON) | Max. 12 hrs (NC OFF)',
                    'unit': '',
                },
                'Waterproof': {
                    'name': 'Waterproof',
                    'description': 'Indicates whether the headphones are waterproof and to what degree.',
                    'value': 'Yes (IPX4 equivalent)',
        

#### Standardize Specs
So some webpages (looking at you OnePlus) have spelling mistakes. Otherwise,
different brands use slightly different names for the same thing. And of course,
they don't all include the same things. So now we get the agent to standardize
the names of the specs, putting NIL for specs that are missing for specific brands.

In [12]:
# Doing it this way is to force it to consider mapping for every product.
SpecsMapping = create_model(
    "SpecsMapping",
    mapped_name=(str, Field(description="Standardized name of the specification")),
    **{
        competitor: (
            str,
            Field(
                description="Product's original specification name mapped to this standardized name, else N/A"
            ),
        )
        for competitor in competitors
    },
)

# I also force it to consider which specs its keeping and which its dropping first
# before generating the mapping.
SpecsDropped = create_model(
    "SpecsDropped",
    **{
        competitor: (
            List[str],
            Field(description="Specifications that were dropped from this product"),
        )
        for competitor in competitors
    },
)
SpecsKept = create_model(
    "SpecsKept",
    **{
        competitor: (
            List[str],
            Field(description="Specifications that were kept from this product"),
        )
        for competitor in competitors
    },
)


class SpecsMappingResult(BaseModel):
    dropped: SpecsDropped = Field(description="Specifications that were dropped")
    kept: SpecsKept = Field(description="Specifications that were kept")
    results: List[SpecsMapping] = Field(description="Standardized specification names")


In [13]:
prompt_standardize = f"""\
### Job Description
Your job is to standardize the naming of technical specifications across different \
{PRODUCT}. They are:
{"\n".join([f"- {prod}" for prod in datasheets])}

Unfortunately, different vendors use different names to refer to the same thing. \
They may also make spelling mistakes. Some vendors may include specs that others \
do not, while others may omit specs that are present in the others.

Include specifications that are present in most products, and ignore those that \
are only present for one or two products. For products which are missing a specification, \
put N/A in the mapping.

You are to output a JSON object that maps the original specification name for a product \
to the standardized name. You are also to consider which specs will be kept or dropped \
for each product before answering. Use the following JSON schema:

### Response Schema
{SpecsMappingResult.model_json_schema()}

### Original Names
"""

for competitor, data in all_specs_raw.items():
    prompt_standardize += f"""\
#### {competitor}
```json
{json.dumps(data, indent=2)}
```
"""

display(Markdown(prompt_standardize))

### Job Description
Your job is to standardize the naming of technical specifications across different earbuds. They are:
- Sony WF-1000XM5
- Bose QuietComfort Ultra Earbuds
- OnePlus Buds 3

Unfortunately, different vendors use different names to refer to the same thing. They may also make spelling mistakes. Some vendors may include specs that others do not, while others may omit specs that are present in the others.

Include specifications that are present in most products, and ignore those that are only present for one or two products. For products which are missing a specification, put N/A in the mapping.

You are to output a JSON object that maps the original specification name for a product to the standardized name. You are also to consider which specs will be kept or dropped for each product before answering. Use the following JSON schema:

### Response Schema
{'$defs': {'SpecsDropped': {'properties': {'Sony WF-1000XM5': {'description': 'Specifications that were dropped from this product', 'items': {'type': 'string'}, 'title': 'Sony Wf-1000Xm5', 'type': 'array'}, 'Bose QuietComfort Ultra Earbuds': {'description': 'Specifications that were dropped from this product', 'items': {'type': 'string'}, 'title': 'Bose Quietcomfort Ultra Earbuds', 'type': 'array'}, 'OnePlus Buds 3': {'description': 'Specifications that were dropped from this product', 'items': {'type': 'string'}, 'title': 'Oneplus Buds 3', 'type': 'array'}}, 'required': ['Sony WF-1000XM5', 'Bose QuietComfort Ultra Earbuds', 'OnePlus Buds 3'], 'title': 'SpecsDropped', 'type': 'object'}, 'SpecsKept': {'properties': {'Sony WF-1000XM5': {'description': 'Specifications that were kept from this product', 'items': {'type': 'string'}, 'title': 'Sony Wf-1000Xm5', 'type': 'array'}, 'Bose QuietComfort Ultra Earbuds': {'description': 'Specifications that were kept from this product', 'items': {'type': 'string'}, 'title': 'Bose Quietcomfort Ultra Earbuds', 'type': 'array'}, 'OnePlus Buds 3': {'description': 'Specifications that were kept from this product', 'items': {'type': 'string'}, 'title': 'Oneplus Buds 3', 'type': 'array'}}, 'required': ['Sony WF-1000XM5', 'Bose QuietComfort Ultra Earbuds', 'OnePlus Buds 3'], 'title': 'SpecsKept', 'type': 'object'}, 'SpecsMapping': {'properties': {'mapped_name': {'description': 'Standardized name of the specification', 'title': 'Mapped Name', 'type': 'string'}, 'Sony WF-1000XM5': {'description': "Product's original specification name mapped to this standardized name, else N/A", 'title': 'Sony Wf-1000Xm5', 'type': 'string'}, 'Bose QuietComfort Ultra Earbuds': {'description': "Product's original specification name mapped to this standardized name, else N/A", 'title': 'Bose Quietcomfort Ultra Earbuds', 'type': 'string'}, 'OnePlus Buds 3': {'description': "Product's original specification name mapped to this standardized name, else N/A", 'title': 'Oneplus Buds 3', 'type': 'string'}}, 'required': ['mapped_name', 'Sony WF-1000XM5', 'Bose QuietComfort Ultra Earbuds', 'OnePlus Buds 3'], 'title': 'SpecsMapping', 'type': 'object'}}, 'properties': {'dropped': {'$ref': '#/$defs/SpecsDropped', 'description': 'Specifications that were dropped'}, 'kept': {'$ref': '#/$defs/SpecsKept', 'description': 'Specifications that were kept'}, 'results': {'description': 'Standardized specification names', 'items': {'$ref': '#/$defs/SpecsMapping'}, 'title': 'Results', 'type': 'array'}}, 'required': ['dropped', 'kept', 'results'], 'title': 'SpecsMappingResult', 'type': 'object'}

### Original Names
#### Sony WF-1000XM5
```json
{
  "product": "Sony WF-1000XM5",
  "specs": {
    "Noise Canceling": {
      "name": "Noise Canceling",
      "description": "Indicates whether the headphones have noise canceling feature to reduce external noise.",
      "value": "Yes",
      "unit": ""
    },
    "Battery Life": {
      "name": "Battery Life",
      "description": "General information about the battery life of the product.",
      "value": "Max. 8 hrs (NC ON) | Max. 12 hrs (NC OFF)",
      "unit": ""
    },
    "Waterproof": {
      "name": "Waterproof",
      "description": "Indicates whether the headphones are waterproof and to what degree.",
      "value": "Yes (IPX4 equivalent)",
      "unit": ""
    },
    "Bluetooth\u00ae Specification": {
      "name": "Bluetooth\u00ae Specification",
      "description": "General information about the bluetooth specification of the product.",
      "value": "Bluetooth Specification Version 5.3",
      "unit": ""
    },
    "Supported Audio Format(S)": {
      "name": "Supported Audio Format(S)",
      "description": "The audio formats supported by the Bluetooth connection.",
      "value": "SBC, AAC, LDAC, LC3",
      "unit": ""
    },
    "WEIGHT": {
      "name": "WEIGHT",
      "description": "The weight of the earbuds.",
      "value": "Approx. 0.21",
      "unit": "\" x2 (including earbud tips (M))"
    },
    "DRIVER UNIT": {
      "name": "DRIVER UNIT",
      "description": "The size of the driver unit in the headphones.",
      "value": "0.33",
      "unit": "\""
    },
    "HEADPHONE TYPE": {
      "name": "HEADPHONE TYPE",
      "description": "The type of headphones, either closed or open.",
      "value": "Closed, dynamic",
      "unit": ""
    },
    "FREQUENCY RESPONSE (BLUETOOTH\u00ae COMMUNICATION)": {
      "name": "FREQUENCY RESPONSE (BLUETOOTH\u00ae COMMUNICATION)",
      "description": "The frequency response range of the headphones when connected via Bluetooth.",
      "value": "20\u201320,000 Hz (44.1 kHz sampling) | 20\u201340,000 Hz (LDAC 96 kHz sampling 990 kbps)",
      "unit": ""
    },
    "BATTERY CHARGE TIME": {
      "name": "BATTERY CHARGE TIME",
      "description": "The time it takes to fully charge the headphones' battery.",
      "value": "Approx. 1.5 hrs",
      "unit": ""
    },
    "BATTERY CHARGE METHOD": {
      "name": "BATTERY CHARGE METHOD",
      "description": "The method used to charge the headphones' battery.",
      "value": "USB charger | Wireless charger (with case)",
      "unit": ""
    },
    "BATTERY LIFE (CONTINUOUS MUSIC PLAYBACK TIME)": {
      "name": "BATTERY LIFE (CONTINUOUS MUSIC PLAYBACK TIME)",
      "description": "The continuous music playback time of the headphones on a full charge.",
      "value": "Max. 8 hrs (NC ON) | Max. 12 hrs (NC OFF)",
      "unit": ""
    },
    "BATTERY LIFE (CONTINUOUS COMMUNICATION TIME)": {
      "name": "BATTERY LIFE (CONTINUOUS COMMUNICATION TIME)",
      "description": "The continuous communication time of the headphones on a full charge.",
      "value": "Max. 6 hrs (NC ON) | Max. 7 hrs (NC OFF)",
      "unit": ""
    },
    "FREQUENCY RANGE": {
      "name": "FREQUENCY RANGE",
      "description": "The frequency range of the Bluetooth connection.",
      "value": "2.4 GHz band (2.4000\u20132.4835 GHz)",
      "unit": ""
    },
    "BLUETOOTH\u00ae VERSION": {
      "name": "BLUETOOTH\u00ae VERSION",
      "description": "The version of Bluetooth supported by the headphones.",
      "value": "Bluetooth Specification Version 5.3",
      "unit": ""
    },
    "PROFILE": {
      "name": "PROFILE",
      "description": "The Bluetooth profiles supported by the headphones, which determine the types of devices and functions they can connect with.",
      "value": "A2DP, AVRCP, HFP, HSP, TMAP, CSIP, MCP, VCP, CCP",
      "unit": ""
    },
    "Charging Case": {
      "name": "Charging Case",
      "description": "General information about the charging case.",
      "value": "Approx. 1.38 oz",
      "unit": ""
    },
    "DIMENSION": {
      "name": "DIMENSION",
      "description": "The dimensions of the charging case.",
      "value": "Approx. 2.54\" x 1.57\" x 1.04\"",
      "unit": ""
    },
    "DSEE Extreme": {
      "name": "DSEE Extreme",
      "description": "Indicates whether the headphones have DSEE Extreme feature.",
      "value": "Yes",
      "unit": ""
    },
    "AMBIENT SOUND MODE": {
      "name": "AMBIENT SOUND MODE",
      "description": "Indicates whether the headphones have ambient sound mode feature.",
      "value": "Yes",
      "unit": ""
    }
  }
}
```
#### Bose QuietComfort Ultra Earbuds
```json
{
  "product": "Bose QuietComfort Ultra Earbuds",
  "specs": {
    "Noise Cancelling": {
      "name": "Noise Cancelling",
      "description": "Indicates whether the headphones have active noise-cancelling technology to reduce unwanted ambient sounds.",
      "value": "Yes",
      "unit": ""
    },
    "Microphones": {
      "name": "Microphones",
      "description": "Specifies if the headphones have built-in microphones for calls and voice commands.",
      "value": "Built-in Microphone",
      "unit": ""
    },
    "Water Resistant": {
      "name": "Water Resistant",
      "description": "Indicates the level of protection against water or sweat, usually measured by an IP rating.",
      "value": "IPX4",
      "unit": ""
    },
    "Headphone Fit": {
      "name": "Headphone Fit",
      "description": "Describes how the headphones are designed to fit in or around the ear.",
      "value": "In Ear",
      "unit": ""
    },
    "Battery Charge Time": {
      "name": "Battery Charge Time",
      "description": "The amount of time it takes to fully charge the headphone's battery.",
      "value": "2",
      "unit": "hours"
    },
    "Rechargeable": {
      "name": "Rechargeable",
      "description": "Indicates whether the headphones can be recharged, usually via USB.",
      "value": "Yes",
      "unit": ""
    },
    "Wireless Connectivity": {
      "name": "Wireless Connectivity",
      "description": "Specifies the type of wireless technology used for connecting to devices.",
      "value": "A2DP Bluetooth Audio Streaming, HFP Bluetooth",
      "unit": ""
    },
    "Charging Interface(s)": {
      "name": "Charging Interface(s)",
      "description": "The type of connector used to charge the headphones.",
      "value": "USB C PORT",
      "unit": ""
    },
    "Case": {
      "name": "Case",
      "description": "Describes the type of case included with the headphones.",
      "value": "Charging",
      "unit": ""
    },
    "Product Material": {
      "name": "Product Material",
      "description": "The materials used in the construction of the headphones.",
      "value": "Plastic (PC-ABS), Silicone, Metal",
      "unit": ""
    },
    "Product Case Material": {
      "name": "Product Case Material",
      "description": "The material used to construct the charging case of the headphones.",
      "value": "Plastic (Hard)",
      "unit": ""
    },
    "Noise Control Type": {
      "name": "Noise Control Type",
      "description": "Specifies the type of noise control offered by the headphones, such as active or adjustable noise cancellation.",
      "value": "Adjustable Noise Cancelling",
      "unit": ""
    },
    "Charging Accessory included": {
      "name": "Charging Accessory included",
      "description": "Indicates whether a charging cable or adapter is included with the headphones.",
      "value": "Yes",
      "unit": ""
    },
    "Bose App": {
      "name": "Bose App",
      "description": "Specifies if the headphones are compatible with the Bose app for added control and customization.",
      "value": "Bose App",
      "unit": ""
    },
    "Bud Single": {
      "name": "Bud Single",
      "description": "Dimensions of a single earbud",
      "value": "1.23\" H x 0.79\" W x 0.96\" D (0.017 lb)",
      "unit": ""
    }
  }
}
```
#### OnePlus Buds 3
```json
{
  "product": "OnePlus Buds 3",
  "specs": {
    "Noise Cancellation": {
      "name": "Noise Cancellation",
      "description": "The noise cancellation level of the earbuds. It indicates the maximum amount of noise that the earbuds can block out, which helps to create a more immersive listening experience by reducing distractions from the surrounding environment.",
      "value": "Up to 49dB Smart Adaptive Noise Cancellation",
      "unit": "dB"
    },
    "Battery Life": {
      "name": "Battery Life",
      "description": "The duration for which the earbuds can be used on a single charge.",
      "value": "Earbuds: 10 hours (ANC off, 50% volume, AAC); Earbuds + Case: 44 hours (ANC off, 50% volume, AAC); Earbuds: 6.5 hours (ANC on/Transparency mode, 50% volume, AAC); Earbuds + Case: 28 hours (ANC on/Transparency mode, 50% volume, AAC); Earbuds: 7 hours (ANC off, 50% volume, LHDC); Earbuds + Case: 30 hours (ANC off, 50% volume, LHDC); Earbuds: 5.5 hours (ANC on/Transparency mode, 50% volume, LHDC); Earbuds + Case: 24 hours (ANC on/Transparency mode, 50% volume, LHDC)",
      "unit": ""
    },
    "Bluetooth\u00c2\u00ae Version": {
      "name": "Bluetooth\u00c2\u00ae Version",
      "description": "The version of Bluetooth supported by the earbuds. A higher version typically offers faster data transfer rates, improved connection stability, and lower power consumption.",
      "value": "Bluetooth\u00c2\u00ae 5.3",
      "unit": ""
    },
    "Audio Codecs": {
      "name": "Audio Codecs",
      "description": "Methods for encoding and decoding digital audio data. They affect the quality and bandwidth of audio transmitted wirelessly.",
      "value": "LHDC/AAC/SBC",
      "unit": ""
    },
    "Water and Sweat Resistance": {
      "name": "Water and Sweat Resistance",
      "description": "The level of protection the earbuds have against water and sweat. It is usually measured by an IP rating.",
      "value": "Earbuds: IP55",
      "unit": ""
    },
    "Drivers": {
      "name": "Drivers",
      "description": "The component in the earbuds that produces sound. The size and type of driver can affect the audio quality and frequency response.",
      "value": "10.4mm woofer + 6mm tweeter dual drivers",
      "unit": "mm"
    },
    "Dimentions": {
      "name": "Dimentions",
      "description": "The physical size of the earbuds and charging case.",
      "value": "Earbuds: 31.68*20.22*24.4mm; Charging case: 58.72*50.15*25.81mm",
      "unit": "mm"
    },
    "Weight": {
      "name": "Weight",
      "description": "The physical weight of the earbuds and charging case.",
      "value": "Earbuds: 4.8g; Charging case: 40.8g",
      "unit": "g"
    },
    "Battery Capacity (earbuds)": {
      "name": "Battery Capacity (earbuds)",
      "description": "The amount of electrical energy that the earbuds can store, measured in milliampere-hours (mAh). A higher capacity generally translates to longer battery life.",
      "value": "58mAh",
      "unit": "mAh"
    },
    "Battery Capacity (charging case)": {
      "name": "Battery Capacity (charging case)",
      "description": "The amount of electrical energy that the charging case can store, measured in milliampere-hours (mAh).",
      "value": "520mAh",
      "unit": "mAh"
    },
    "Charging Interface": {
      "name": "Charging Interface",
      "description": "The type of connector used to charge the earbuds and the charging case.",
      "value": "Wired: USB Type-C",
      "unit": ""
    },
    "Microphones": {
      "name": "Microphones",
      "description": "The number of microphones on each earbud, used for voice calls and noise cancellation.",
      "value": "3 mics per side",
      "unit": ""
    },
    "Transparency Mode": {
      "name": "Transparency Mode",
      "description": "A feature that allows external sounds to be heard while wearing the earbuds, providing awareness of the surroundings.",
      "value": "",
      "unit": ""
    },
    "Frequency Response": {
      "name": "Frequency Response",
      "description": "The range of frequencies that the earbuds can reproduce, measured in Hertz (Hz). A wider range generally indicates better audio quality.",
      "value": "15Hz~40KHz",
      "unit": "Hz"
    },
    "Latency": {
      "name": "Latency",
      "description": "The delay between the audio source and when it is heard through the earbuds, measured in milliseconds (ms). Lower latency is important for gaming and video playback.",
      "value": "94ms",
      "unit": "ms"
    },
    "Touch Control": {
      "name": "Touch Control",
      "description": "The gestures that can be used to control the earbuds, such as play/pause, volume, and answering calls.",
      "value": "Tap twice: Next song/Answer incoming call/Hang up; Tap three times: Previous song; Tap and hold 3s: Reject call; Press and hold: Switch noise cancellation/Transparency mode",
      "unit": ""
    },
    "Bluetooth\u00c2\u00ae Codec": {
      "name": "Bluetooth\u00c2\u00ae Codec",
      "description": "Methods for encoding and decoding digital audio data over Bluetooth. Different codecs offer varying levels of audio quality and compression efficiency.",
      "value": "LHDC/AAC/SBC",
      "unit": ""
    },
    "Distance": {
      "name": "Distance",
      "description": "The maximum range at which the earbuds can maintain a stable Bluetooth connection with the audio source.",
      "value": "10m",
      "unit": "m"
    },
    "Playback (ANC off, 50% volume, AAC)": {
      "name": "Playback (ANC off, 50% volume, AAC)",
      "description": "Battery life with ANC turned off, volume at 50%, and AAC codec",
      "value": "Earbuds: 10 hours; Earbuds + Case: 44 hours",
      "unit": ""
    },
    "Playback (ANC on/Transparency mode, 50% volume, AAC)": {
      "name": "Playback (ANC on/Transparency mode, 50% volume, AAC)",
      "description": "Battery life with ANC on or Transparency mode, volume at 50%, and AAC codec",
      "value": "Earbuds: 6.5 hours; Earbuds + Case: 28 hours",
      "unit": ""
    }
  }
}
```


In [14]:
resp_standardize = client.models.generate_content(
    model=GOOGLE_AI_MODEL,
    contents=prompt_standardize,
    config=types.GenerateContentConfig(
        responseMimeType="application/json",
        responseSchema=SpecsMappingResult,
    ),
)

standardized = resp_standardize.parsed

#### Post-Process to Unify Specs

In [16]:
all_specs = {}

for mapping in standardized.results:
    name = mapping.mapped_name

    for competitor in competitors:
        old_name = getattr(mapping, competitor)
        if old_name == "N/A":
            print(f"Skipped {name} for {competitor}.")
            continue

        try:
            specs = all_specs_raw[competitor]["specs"][old_name]
        except KeyError:
            print(f"AI halluincated something, skipped {name} for {competitor}.")
            continue
        all_specs[name] = all_specs.get(name, {})
        all_specs[name][competitor] = specs

print(json.dumps(all_specs, indent=2))

Skipped Battery Life for Bose QuietComfort Ultra Earbuds.
AI halluincated something, skipped Bluetooth Version for Sony WF-1000XM5.
Skipped Bluetooth Version for Bose QuietComfort Ultra Earbuds.
AI halluincated something, skipped Bluetooth Version for OnePlus Buds 3.
Skipped Audio Codecs for Bose QuietComfort Ultra Earbuds.
Skipped Drivers for Sony WF-1000XM5.
Skipped Drivers for Bose QuietComfort Ultra Earbuds.
Skipped Weight for Sony WF-1000XM5.
Skipped Weight for Bose QuietComfort Ultra Earbuds.
Skipped Charging Interface for Sony WF-1000XM5.
Skipped Microphones for Sony WF-1000XM5.
Skipped Microphones for Bose QuietComfort Ultra Earbuds.
Skipped Frequency Response for Sony WF-1000XM5.
Skipped Frequency Response for Bose QuietComfort Ultra Earbuds.
Skipped Battery Charge Time for Sony WF-1000XM5.
Skipped Battery Charge Time for OnePlus Buds 3.
{
  "Noise Cancellation": {
    "Sony WF-1000XM5": {
      "name": "Noise Canceling",
      "description": "Indicates whether the headphones 

In [17]:
with open(SPECS_PATH, "w") as f:
    json.dump(all_specs, f, indent=2)