Continues from 2_find_datasheet.ipynb

In [1]:
from google import genai
from google.genai import types
from IPython.display import Markdown
from pydantic import BaseModel, RootModel, Field, create_model
from devtools import debug
from pathlib import Path
from typing import *
from tqdm.auto import tqdm
import json
import time

### Config

In [2]:
PRODUCT = "wireless over-ear headphones"
MAX_SPECS = 20

In [3]:
DATA_DIR = Path("session") / PRODUCT

STAGE1_PATH = DATA_DIR / "stage_1.json"
assert STAGE1_PATH.exists(), "Run 1_describe_product.ipynb first!"

DATASHEET_PATH = DATA_DIR / "datasheets.json"
assert DATASHEET_PATH.exists(), "Run 2_find_datasheet.ipynb first!"

SPECS_PATH = DATA_DIR / "specs.json"

In [4]:
# Should be from Google AI Studio.
GOOGLE_AI_KEY = "AIzaSyDAlPx7St5BUXqlwiqFKvlT-Sc2dnTT4Jc"
# 2.0 Flash since free 1500 RPD and Gemma's structured output is disabled.
GOOGLE_AI_MODEL = "gemini-2.0-flash"
# GOOGLE_AI_MODEL = "gemini-2.5-flash-preview-04-17"

### Operations

#### Setup

In [5]:
client = genai.Client(api_key=GOOGLE_AI_KEY)

In [6]:
with open(DATASHEET_PATH) as f:
    datasheets = json.load(f)

competitors = list(datasheets.keys())
print(competitors)

['Bose QuietComfort Ultra Headphones', 'Sony WH-1000XM5', 'Focal Bathys', 'Anker Soundcore Space One', 'Apple AirPods Max']


In [7]:
with open(STAGE1_PATH) as f:
    stage1_meta = json.load(f)

print(stage1_meta["specs"])

['Noise Cancellation Technology', 'Audio Codec Support (e.g., AAC, SBC, aptX, LDAC)', 'Battery Life', 'Driver Size', 'Impedance', 'Bluetooth Version', 'Weight', 'Comfort and Fit (e.g., headband padding, ear cup material)', 'Microphone Quality', 'Water Resistance Rating']


#### Extracting Tabulated Specs from Datasheet HTML
One advantage over using Gemini Grounded Search is more control over how the
sources are found. The other is using only one request instead of two, since
Grounded Search Mode disables Structured Output, but feeding it HTML in context
doesn't.

In [15]:
class Spec(BaseModel):
    """A single spec for a product."""

    name: str = Field(description="Name of the technical specification")
    description: str = Field(
        description="Detailed explanation of what the specification means"
    )
    value: Union[int, float, bool, str] = Field(description="Exact/concise value of the specification")
    unit: str = Field(
        description="Units for the specification, leave empty if not applicable"
    )


class ProductSpecNames(BaseModel):
    """A list of specs for a product."""

    product: str = Field(description="Name of the product")
    names: List[str] = Field(
        max_length=MAX_SPECS * 2,
        description="List of technical specification names for the product",
    )
    ordered: List[str] = Field(
        max_length=MAX_SPECS,
        description="Same as names, but ordered by importance from most common to least common",
    )

In [None]:
def prompt_specs(product_name, html):
    return f"""\
### Webpage
```html
{html}
```

### Job Description
Your job is to extract the names of the technical specifications of {product_name} {PRODUCT} \
from its HTML webpage above.

First, extract the names of all technical specifications. \
Some may be grouped under the same heading but should be listed separately.

Second, order the names by importance from most common to least common, keeping \
only the top **{MAX_SPECS}** technical specifications frequently compared for {PRODUCT}.

Some examples of technical specifications are:
{"\n".join(f"- {s}" for s in stage1_meta["specs"])}

However, do note that you should write the names of the specifications found as-is \
from the webpage, and not the examples above.

Use the following JSON schema below:

### Response Schema
{ProductSpecNames.model_json_schema()}\
"""


def prompt_specs_specific(product_name, html, specs):
    inner = create_model(
        "Specs",
        **{spec: (Spec, Field()) for spec in specs},
    )
    schema = create_model(
        "ProductSpecs",
        product=(Literal[product_name], Field(description="Name of the product")),
        specs=(inner, Field(description="List of specs for the product")),
    )

    return (
        f"""\
### Webpage
```html
{html}
```

### Job Description
Your job is to extract the values of the technical specifications of {product_name} {PRODUCT} \
from its HTML webpage. Use the context of the webpage to explain what each specification \
means as it will be used later as part of `description`. \
`value` should typically be a number or yes/no. The specifications to extract are:
{"\n".join(f"- {s}" for s in specs)}

Finally, use the following JSON schema below:

### Response Schema
{schema.model_json_schema()}\
""",
        schema,
    )


def extract_specs(product_name, html):
    prompt = prompt_specs(product_name, html)

    # Sometimes the failure is due to halluincated specs the model can't find.
    # So we re-run from here.
    data = None
    while data is None:

        names = None
        while names is None:
            try:
                resp = client.models.generate_content(
                    model=GOOGLE_AI_MODEL,
                    contents=prompt,
                    config=types.GenerateContentConfig(
                        responseMimeType="application/json",
                        responseSchema=ProductSpecNames,
                    ),
                )
                names = resp.parsed
            except Exception as e:
                print(f"Error: {e}")

            if names is None:
                print("Retrying...")
                time.sleep(3)  # Avoid rate limit
                continue
            
        names = names.ordered
        print(f"Extracted {len(names)} specs: {names}")
        print("Extracting values...")
        time.sleep(3)  # Avoid rate limit

        prompt2, schema = prompt_specs_specific(product_name, html, names)
        # display(Markdown(prompt2))

        try:
            resp2 = client.models.generate_content(
                model=GOOGLE_AI_MODEL,
                contents=prompt2,
                config=types.GenerateContentConfig(
                    responseMimeType="application/json",
                    responseSchema=schema,
                ),
            )
            data = resp2.parsed
            if data is None:
                print("Failed to parse:")
                debug(resp2)

        except Exception as e:
            print(f"Error: {e}")

        if data is None:
            print("Retrying...")
            time.sleep(3)  # Avoid rate limit
            continue

    return data


In [None]:
all_specs_raw = {}

for competitor, data in tqdm(datasheets.items()):
    if competitor in all_specs_raw:
        print(f"Already extracted: {competitor}")
        continue

    print(f"Extracting: {competitor}")
    html = data["html"]
    specs = extract_specs(competitor, html)
    # debug(specs)
    all_specs_raw[competitor] = specs.model_dump()
    print(f"Extracted: {competitor}")
    time.sleep(3)  # Avoid rate limit

  0%|          | 0/5 [00:00<?, ?it/s]

Already extracted: Bose QuietComfort Ultra Headphones
Already extracted: Sony WH-1000XM5
Already extracted: Focal Bathys
Already extracted: Anker Soundcore Space One
Extracting: Apple AirPods Max
Extracted 12 specs: ['Battery', 'Audio Technology', 'Microphones', 'Chip', 'Connectivity', 'Size and\tWeight', 'Sensors', 'Controls', 'Accessibility', 'Color', 'System Requirements', 'In the Box']
Extracting values...
/tmp/ipykernel_14429/722889600.py:112 extract_specs
    resp2: GenerateContentResponse(
        candidates=[
            Candidate(
                content=Content(
                    parts=[
                        Part(
                            video_metadata=None,
                            thought=None,
                            code_execution_result=None,
                            executable_code=None,
                            file_data=None,
                            function_call=None,
                            function_response=None,
                      

In [22]:
_ = debug(all_specs_raw)

/tmp/ipykernel_14429/1955527673.py:1 <module>
    all_specs_raw: {
        'Bose QuietComfort Ultra Headphones': {
            'product': 'Bose QuietComfort Ultra Headphones',
            'specs': {
                'Noise Cancelling': {
                    'name': 'Noise Cancelling',
                    'description': (
                        'Indicates whether the headphone has noise cancelling feature to reduce unwanted ambient sound'
                        's.'
                    ),
                    'value': True,
                    'unit': '',
                },
                'Battery Life': {
                    'name': 'Battery Life',
                    'description': 'The maximum duration of use on a full charge.',
                    'value': '24',
                    'unit': 'hours',
                },
                'Wireless Connectivity': {
                    'name': 'Wireless Connectivity',
                    'description': (
                        'Specifies

#### Standardize Specs
So some webpages (looking at you OnePlus) have spelling mistakes. Otherwise,
different brands use slightly different names for the same thing. And of course,
they don't all include the same things. So now we get the agent to standardize
the names of the specs, putting NIL for specs that are missing for specific brands.

In [23]:
# Doing it this way is to force it to consider mapping for every product.
SpecsMapping = create_model(
    "SpecsMapping",
    mapped_name=(str, Field(description="Standardized name of the specification")),
    **{
        competitor: (
            str,
            Field(
                description="Product's original specification name mapped to this standardized name, else N/A"
            ),
        )
        for competitor in competitors
    },
)

# I also force it to consider which specs its keeping and which its dropping first
# before generating the mapping.
SpecsDropped = create_model(
    "SpecsDropped",
    **{
        competitor: (
            List[str],
            Field(description="Specifications that were dropped from this product"),
        )
        for competitor in competitors
    },
)
SpecsKept = create_model(
    "SpecsKept",
    **{
        competitor: (
            List[str],
            Field(description="Specifications that were kept from this product"),
        )
        for competitor in competitors
    },
)


class SpecsMappingResult(BaseModel):
    dropped: SpecsDropped = Field(description="Specifications that were dropped")
    kept: SpecsKept = Field(description="Specifications that were kept")
    results: List[SpecsMapping] = Field(description="Standardized specification names")


In [24]:
prompt_standardize = f"""\
### Job Description
Your job is to standardize the naming of technical specifications across different \
{PRODUCT}. They are:
{"\n".join([f"- {prod}" for prod in datasheets])}

Unfortunately, different vendors use different names to refer to the same thing. \
They may also make spelling mistakes. Some vendors may include specs that others \
do not, while others may omit specs that are present in the others.

Include specifications that are present in most products, and ignore those that \
are only present for one or two products. For products which are missing a specification, \
put N/A in the mapping.

You are to output a JSON object that maps the original specification name for a product \
to the standardized name. You are also to consider which specs will be kept or dropped \
for each product before answering. Use the following JSON schema:

### Response Schema
{SpecsMappingResult.model_json_schema()}

### Original Names
"""

for competitor, data in all_specs_raw.items():
    prompt_standardize += f"""\
#### {competitor}
```json
{json.dumps(data, indent=2)}
```
"""

display(Markdown(prompt_standardize))

### Job Description
Your job is to standardize the naming of technical specifications across different wireless over-ear headphones. They are:
- Bose QuietComfort Ultra Headphones
- Sony WH-1000XM5
- Focal Bathys
- Anker Soundcore Space One
- Apple AirPods Max

Unfortunately, different vendors use different names to refer to the same thing. They may also make spelling mistakes. Some vendors may include specs that others do not, while others may omit specs that are present in the others.

Include specifications that are present in most products, and ignore those that are only present for one or two products. For products which are missing a specification, put N/A in the mapping.

You are to output a JSON object that maps the original specification name for a product to the standardized name. You are also to consider which specs will be kept or dropped for each product before answering. Use the following JSON schema:

### Response Schema
{'$defs': {'SpecsDropped': {'properties': {'Bose QuietComfort Ultra Headphones': {'description': 'Specifications that were dropped from this product', 'items': {'type': 'string'}, 'title': 'Bose Quietcomfort Ultra Headphones', 'type': 'array'}, 'Sony WH-1000XM5': {'description': 'Specifications that were dropped from this product', 'items': {'type': 'string'}, 'title': 'Sony Wh-1000Xm5', 'type': 'array'}, 'Focal Bathys': {'description': 'Specifications that were dropped from this product', 'items': {'type': 'string'}, 'title': 'Focal Bathys', 'type': 'array'}, 'Anker Soundcore Space One': {'description': 'Specifications that were dropped from this product', 'items': {'type': 'string'}, 'title': 'Anker Soundcore Space One', 'type': 'array'}, 'Apple AirPods Max': {'description': 'Specifications that were dropped from this product', 'items': {'type': 'string'}, 'title': 'Apple Airpods Max', 'type': 'array'}}, 'required': ['Bose QuietComfort Ultra Headphones', 'Sony WH-1000XM5', 'Focal Bathys', 'Anker Soundcore Space One', 'Apple AirPods Max'], 'title': 'SpecsDropped', 'type': 'object'}, 'SpecsKept': {'properties': {'Bose QuietComfort Ultra Headphones': {'description': 'Specifications that were kept from this product', 'items': {'type': 'string'}, 'title': 'Bose Quietcomfort Ultra Headphones', 'type': 'array'}, 'Sony WH-1000XM5': {'description': 'Specifications that were kept from this product', 'items': {'type': 'string'}, 'title': 'Sony Wh-1000Xm5', 'type': 'array'}, 'Focal Bathys': {'description': 'Specifications that were kept from this product', 'items': {'type': 'string'}, 'title': 'Focal Bathys', 'type': 'array'}, 'Anker Soundcore Space One': {'description': 'Specifications that were kept from this product', 'items': {'type': 'string'}, 'title': 'Anker Soundcore Space One', 'type': 'array'}, 'Apple AirPods Max': {'description': 'Specifications that were kept from this product', 'items': {'type': 'string'}, 'title': 'Apple Airpods Max', 'type': 'array'}}, 'required': ['Bose QuietComfort Ultra Headphones', 'Sony WH-1000XM5', 'Focal Bathys', 'Anker Soundcore Space One', 'Apple AirPods Max'], 'title': 'SpecsKept', 'type': 'object'}, 'SpecsMapping': {'properties': {'mapped_name': {'description': 'Standardized name of the specification', 'title': 'Mapped Name', 'type': 'string'}, 'Bose QuietComfort Ultra Headphones': {'description': "Product's original specification name mapped to this standardized name, else N/A", 'title': 'Bose Quietcomfort Ultra Headphones', 'type': 'string'}, 'Sony WH-1000XM5': {'description': "Product's original specification name mapped to this standardized name, else N/A", 'title': 'Sony Wh-1000Xm5', 'type': 'string'}, 'Focal Bathys': {'description': "Product's original specification name mapped to this standardized name, else N/A", 'title': 'Focal Bathys', 'type': 'string'}, 'Anker Soundcore Space One': {'description': "Product's original specification name mapped to this standardized name, else N/A", 'title': 'Anker Soundcore Space One', 'type': 'string'}, 'Apple AirPods Max': {'description': "Product's original specification name mapped to this standardized name, else N/A", 'title': 'Apple Airpods Max', 'type': 'string'}}, 'required': ['mapped_name', 'Bose QuietComfort Ultra Headphones', 'Sony WH-1000XM5', 'Focal Bathys', 'Anker Soundcore Space One', 'Apple AirPods Max'], 'title': 'SpecsMapping', 'type': 'object'}}, 'properties': {'dropped': {'$ref': '#/$defs/SpecsDropped', 'description': 'Specifications that were dropped'}, 'kept': {'$ref': '#/$defs/SpecsKept', 'description': 'Specifications that were kept'}, 'results': {'description': 'Standardized specification names', 'items': {'$ref': '#/$defs/SpecsMapping'}, 'title': 'Results', 'type': 'array'}}, 'required': ['dropped', 'kept', 'results'], 'title': 'SpecsMappingResult', 'type': 'object'}

### Original Names
#### Bose QuietComfort Ultra Headphones
```json
{
  "product": "Bose QuietComfort Ultra Headphones",
  "specs": {
    "Noise Cancelling": {
      "name": "Noise Cancelling",
      "description": "Indicates whether the headphone has noise cancelling feature to reduce unwanted ambient sounds.",
      "value": true,
      "unit": ""
    },
    "Battery Life": {
      "name": "Battery Life",
      "description": "The maximum duration of use on a full charge.",
      "value": "24",
      "unit": "hours"
    },
    "Wireless Connectivity": {
      "name": "Wireless Connectivity",
      "description": "Specifies the wireless technologies supported by the headphones for connectivity with devices.",
      "value": "Bluetooth, A2DP Bluetooth Audio Streaming, HFP Bluetooth, AVRCP Bluetooth, Bluetooth Low Energy",
      "unit": ""
    },
    "Microphones": {
      "name": "Microphones",
      "description": "Indicates whether the headphones have built-in microphones for calls and voice commands.",
      "value": "Built-in Microphone",
      "unit": ""
    },
    "Headphone Fit": {
      "name": "Headphone Fit",
      "description": "Describes how the headphones fit on or around the ears.",
      "value": "Around Ear Circumaural",
      "unit": ""
    },
    "Headband": {
      "name": "Headband",
      "description": "Describes the type of headband the headphones have.",
      "value": "On Head Adjustable",
      "unit": ""
    },
    "Product Material": {
      "name": "Product Material",
      "description": "Specifies the materials used in the construction of the headphones.",
      "value": "Plastic, Aluminum, Leather (Protein)",
      "unit": ""
    },
    "Ear Cushion Material": {
      "name": "Ear Cushion Material",
      "description": "Specifies the material used for the ear cushions of the headphones.",
      "value": "Protein Leather",
      "unit": ""
    },
    "Rechargeable": {
      "name": "Rechargeable",
      "description": "Indicates whether the headphones have rechargeable batteries.",
      "value": true,
      "unit": ""
    },
    "Charging Interface(s)": {
      "name": "Charging Interface(s)",
      "description": "Specifies the type of interface used to charge the headphones.",
      "value": "USB C PORT",
      "unit": ""
    },
    "Bose App": {
      "name": "Bose App",
      "description": "Indicates the availability of a companion Bose app for additional features and customization.",
      "value": "Bose App",
      "unit": ""
    },
    "Noise Control Type": {
      "name": "Noise Control Type",
      "description": "Specifies the type of noise control features available on the headphones.",
      "value": "Active Noise Cancelling, Echo Reduction, Adjustable Noise Cancelling",
      "unit": ""
    },
    "Cushions": {
      "name": "Cushions",
      "description": "Indicates whether the earcups cushions are removable.",
      "value": "Removable Cushion",
      "unit": ""
    },
    "Battery Charge Time": {
      "name": "Battery Charge Time",
      "description": "The time it takes to fully charge the headphones' battery.",
      "value": "3",
      "unit": "hours"
    },
    "Charging Accessory included": {
      "name": "Charging Accessory included",
      "description": "Indicates whether a charging accessory like a cable or adapter is included with the headphones.",
      "value": true,
      "unit": ""
    },
    "Case": {
      "name": "Case",
      "description": "Specifies the type of case included with the headphones.",
      "value": "Carry",
      "unit": ""
    },
    "Headphone": {
      "name": "Headphone",
      "description": "Dimensions of the headphone.",
      "value": "7.7\" H x 5.5\" W x 2.0\" D (0.560 lb)",
      "unit": ""
    },
    "Product Case Material": {
      "name": "Product Case Material",
      "description": "Specifies the material used for the case included with the headphones.",
      "value": "Plastic (Hard)",
      "unit": ""
    },
    "Audio cable included": {
      "name": "Audio cable included",
      "description": "Indicates whether an audio cable is included with the headphones for wired connectivity.",
      "value": false,
      "unit": ""
    }
  }
}
```
#### Sony WH-1000XM5
```json
{
  "product": "Sony WH-1000XM5",
  "specs": {
    "WEIGHT": {
      "name": "WEIGHT",
      "description": "The weight of the headphones, which affects portability and comfort.",
      "value": "Approx. 8.82 oz",
      "unit": ""
    },
    "HEADPHONE TYPE": {
      "name": "HEADPHONE TYPE",
      "description": "The type of headphone design, which affects sound isolation and sound leakage. Closed-back headphones generally offer better sound isolation.",
      "value": "Closed, dynamic",
      "unit": ""
    },
    "DRIVER UNIT": {
      "name": "DRIVER UNIT",
      "description": "The size of the headphone driver, which can influence sound quality and bass response.",
      "value": "11.82 in",
      "unit": ""
    },
    "IMPEDANCE (OHM)": {
      "name": "IMPEDANCE (OHM)",
      "description": "The impedance of the headphones, which affects how much power is needed to drive them. Lower impedance headphones are easier to drive with mobile devices.",
      "value": "48 ohm (1 kHz) (when connecting via the headphone cable with the unit turned on), 16 ohm (1 kHz) (when connecting via the headphone cable with the unit turned off)",
      "unit": ""
    },
    "FREQUENCY RESPONSE": {
      "name": "FREQUENCY RESPONSE",
      "description": "The range of frequencies the headphones can reproduce, which affects the overall sound quality.",
      "value": "4 Hz\u201340,000 Hz (JEITA)",
      "unit": ""
    },
    "BLUETOOTH\u00ae VERSION": {
      "name": "BLUETOOTH\u00ae VERSION",
      "description": "The version of Bluetooth supported, which affects connection stability, range, and power consumption. Higher versions generally offer improvements.",
      "value": "Bluetooth Specification Version 5.2",
      "unit": ""
    },
    "SUPPORTED AUDIO FORMAT(S)": {
      "name": "SUPPORTED AUDIO FORMAT(S)",
      "description": "The audio codecs supported over Bluetooth, which affects the quality of wireless audio. LDAC offers higher quality audio than SBC or AAC.",
      "value": "SBC, AAC, LDAC",
      "unit": ""
    },
    "BATTERY LIFE (CONTINUOUS MUSIC PLAYBACK TIME)": {
      "name": "BATTERY LIFE (CONTINUOUS MUSIC PLAYBACK TIME)",
      "description": "The amount of time the headphones can play music on a single charge.",
      "value": "Max. 30 hrs (NC ON), Max. 40 hrs (NC OFF)",
      "unit": ""
    },
    "BATTERY LIFE (CONTINUOUS COMMUNICATION TIME)": {
      "name": "BATTERY LIFE (CONTINUOUS COMMUNICATION TIME)",
      "description": "The amount of time the headphones can be used for calls on a single charge.",
      "value": "Max. 24 hrs (NC ON) , Max. 32 hrs (NC OFF)",
      "unit": ""
    },
    "BATTERY CHARGE TIME": {
      "name": "BATTERY CHARGE TIME",
      "description": "The time it takes to fully charge the headphones' battery.",
      "value": "Approx. 3.5 hrs",
      "unit": ""
    },
    "CORD LENGTH": {
      "name": "CORD LENGTH",
      "description": "The length of the headphone cable, if applicable.",
      "value": "approx. 3.94 ft",
      "unit": ""
    },
    "VOLUME CONTROL": {
      "name": "VOLUME CONTROL",
      "description": "The method used to control the volume on the headphones.",
      "value": "Touch Sensor",
      "unit": ""
    },
    "MICROPHONE": {
      "name": "MICROPHONE",
      "description": "Does the headphone have a microphone?",
      "value": "Yes",
      "unit": ""
    },
    "NOISE CANCELLING": {
      "name": "NOISE CANCELLING",
      "description": "Does the headphone have noise cancelling feature?",
      "value": "Yes",
      "unit": ""
    },
    "AMBIENT SOUND MODE": {
      "name": "AMBIENT SOUND MODE",
      "description": "Ambient Sound Mode lets you hear external sounds without removing the headphones.",
      "value": "Yes",
      "unit": ""
    },
    "WEARING STYLE": {
      "name": "WEARING STYLE",
      "description": "The way the headphones are worn.",
      "value": "Over Ear",
      "unit": ""
    },
    "INPUT(S)": {
      "name": "INPUT(S)",
      "description": "The type of audio input supported by the headphones.",
      "value": "Stereo Mini Jack",
      "unit": ""
    },
    "CORD TYPE": {
      "name": "CORD TYPE",
      "description": "The type of cord that comes with the headphones.",
      "value": "Single-sided (detachable)",
      "unit": ""
    },
    "SENSITIVITIES (DB/MW)": {
      "name": "SENSITIVITIES (DB/MW)",
      "description": "The sensitivity of the headphones, which affects how loud they can get with a given amount of power.",
      "value": "102 dB (1 kHz) / mW (when connecting via the headphone cable with the unit turned on), 100 dB (1 kHz) / mW (when connecting via the headphone cable with the unit turned off)",
      "unit": ""
    },
    "PROFILE": {
      "name": "PROFILE",
      "description": "The Bluetooth profiles supported, which determine the functions available over Bluetooth.",
      "value": "A2DP, AVRCP, HFP, HSP",
      "unit": ""
    }
  }
}
```
#### Focal Bathys
```json
{
  "product": "Focal Bathys",
  "specs": {
    "Noise Cancellation Technology": {
      "name": "Noise Cancellation Technology",
      "description": "Active Noise Cancellation (ANC) technology reduces unwanted background noise, offering listening without disturbances from surrounding sounds.",
      "value": "Yes",
      "unit": ""
    },
    "Bluetooth Version": {
      "name": "Bluetooth Version",
      "description": "Bluetooth version refers to the version of the Bluetooth wireless technology used by the headphones for connecting to devices, with newer versions typically offering improved performance, range, and efficiency.",
      "value": "5.1 Multipoint",
      "unit": ""
    },
    "Battery Life": {
      "name": "Battery Life",
      "description": "Battery life indicates the duration for which the headphones can operate on a single full charge.",
      "value": "30h in Bluetooth, 35h with mini Jack, 42h in USB-DAC mode",
      "unit": ""
    },
    "Audio Codec Support": {
      "name": "Audio Codec Support",
      "description": "Audio codecs are methods used to encode and decode digital audio data. Supported codecs determine the audio quality and compatibility of the headphones with different devices and streaming services.",
      "value": "AAC, aptX\u2122, aptX\u2122 Adaptive, SBC",
      "unit": ""
    },
    "Microphones": {
      "name": "Microphones",
      "description": "Number of microphones incorporated in the headphones, used for calls and active noise cancellation.",
      "value": 8,
      "unit": ""
    },
    "Weight": {
      "name": "Weight",
      "description": "Weight of the headphones.",
      "value": 350,
      "unit": "g"
    },
    "Frequency response (+/- 3dB) :": {
      "name": "Frequency response (+/- 3dB) :",
      "description": "Frequency response indicates the range of frequencies the headphones can reproduce, measured in Hertz (Hz).",
      "value": "15 Hz - 22 kHz",
      "unit": ""
    },
    "Charging time :": {
      "name": "Charging time :",
      "description": "Charging time refers to the duration required to fully charge the headphones' battery.",
      "value": 1.5,
      "unit": "h"
    },
    "Quick charge :": {
      "name": "Quick charge :",
      "description": "Quick charge indicates whether the headphones support fast charging, allowing for a significant amount of battery life to be replenished in a short amount of time.",
      "value": "Yes",
      "unit": ""
    },
    "Connector :": {
      "name": "Connector :",
      "description": "Connector type indicates the physical interface used for wired connections to the headphones.",
      "value": "Jack 3.5 mm, USB-C",
      "unit": ""
    },
    "Voice assistant :": {
      "name": "Voice assistant :",
      "description": "Voice assistant integration indicates compatibility with voice-controlled virtual assistants, allowing users to perform tasks hands-free.",
      "value": "Amazon Alexa, Google Assistant",
      "unit": ""
    },
    "Harmonic distortion rate :": {
      "name": "Harmonic distortion rate :",
      "description": "Harmonic distortion rate indicates the amount of harmonic distortion present in the audio signal, expressed as a percentage.",
      "value": "<0.2 % @1kHz",
      "unit": ""
    },
    "Product type :": {
      "name": "Product type :",
      "description": "Product type classifies the headphones based on their primary function and design.",
      "value": "Bluetooth closed headphones with active noise reduction",
      "unit": ""
    },
    "Loudspeakers :": {
      "name": "Loudspeakers :",
      "description": "Loudspeakers",
      "value": "15/8\" (40mm) Aluminium/Magnesium \u2018M\u2019-shaped dome",
      "unit": ""
    },
    "Battery type :": {
      "name": "Battery type :",
      "description": "Battery type indicates the chemical composition and characteristics of the battery used in the headphones.",
      "value": "Lithium-ion 1060 mAH",
      "unit": ""
    },
    "Carrying case included :": {
      "name": "Carrying case included :",
      "description": "Carrying case included indicates whether the headphones come with a carrying case for storage and portability.",
      "value": "Yes",
      "unit": ""
    },
    "Carrying case depth :": {
      "name": "Carrying case depth :",
      "description": "Carrying case depth",
      "value": 7,
      "unit": "cm"
    },
    "Carrying case height :": {
      "name": "Carrying case height :",
      "description": "Carrying case height",
      "value": 24,
      "unit": "cm"
    },
    "Carrying case width :": {
      "name": "Carrying case width :",
      "description": "Carrying case width",
      "value": 21,
      "unit": "cm"
    },
    "Cables provided :": {
      "name": "Cables provided :",
      "description": "Cables provided",
      "value": "\u2022 1 x 4ft (1.2m) Jack 1/8\" (3.5mm) cable \u2022 1 x 4ft (1.2m) USB-C\u00ae cable",
      "unit": ""
    }
  }
}
```
#### Anker Soundcore Space One
```json
{
  "product": "Anker Soundcore Space One",
  "specs": {
    "Active Noise Cancellation": {
      "name": "Active Noise Cancellation",
      "description": "Reduces unwanted ambient sounds, allowing you to focus on your audio. This is achieved through various techniques, including using microphones to capture external noise and then generating an inverted sound wave to cancel it out.",
      "value": "Adaptive ANC",
      "unit": ""
    },
    "Battery Life": {
      "name": "Battery Life",
      "description": "Indicates how long the headphones can operate wirelessly on a single charge. Longer battery life allows for extended use without needing to recharge frequently.",
      "value": "55H/40H",
      "unit": ""
    },
    "Sound": {
      "name": "Sound",
      "description": "Describes the audio quality produced by the headphones, including clarity, balance, and richness. This is affected by the drivers, audio codecs, and tuning of the headphones.",
      "value": "Detailed Sound, 40mm Drivers",
      "unit": ""
    },
    "Calls": {
      "name": "Calls",
      "description": "Refers to the quality and clarity of phone calls made using the headphones. This is influenced by the microphone quality and noise reduction technologies used.",
      "value": "3 mics with AI",
      "unit": ""
    },
    "Weight": {
      "name": "Weight",
      "description": "The physical weight of the headphones. Lighter headphones are generally more comfortable to wear for extended periods.",
      "value": 265,
      "unit": "g (9.35 oz)"
    },
    "Fast Charging": {
      "name": "Fast Charging",
      "description": "A feature that allows the headphones to quickly gain a certain amount of battery life with a short charging time.",
      "value": "5 Mins = 4H",
      "unit": ""
    },
    "Driver Size": {
      "name": "Driver Size",
      "description": "The diameter of the headphone driver, which is the component that produces sound. Larger drivers can often deliver more powerful and detailed audio.",
      "value": 40,
      "unit": "mm"
    },
    "Multipoint Connection": {
      "name": "Multipoint Connection",
      "description": "A feature that allows the headphones to be connected to two devices simultaneously, making it easy to switch between audio sources.",
      "value": "\u2714\ufe0f",
      "unit": ""
    },
    "Customized EQ": {
      "name": "Customized EQ",
      "description": "Allows users to adjust the sound profile of the headphones to their personal preferences using an equalizer in the companion app.",
      "value": "\u2714\ufe0fHear ID",
      "unit": ""
    },
    "Microphone Quality": {
      "name": "Microphone Quality",
      "description": "The clarity and noise reduction capabilities of the built-in microphone, affecting call quality and voice commands.",
      "value": "3 mics with AI",
      "unit": ""
    },
    "Bluetooth Version": {
      "name": "Bluetooth Version",
      "description": "The version of Bluetooth supported by the headphones, which affects connection speed, range, and power efficiency.  The higher the version number, the more modern the bluetooth technology.",
      "value": "unspecified",
      "unit": ""
    },
    "Comfort and Fit": {
      "name": "Comfort and Fit",
      "description": "Describes how comfortable the headphones are to wear for extended periods, including the earcups, headband, and adjustability.",
      "value": "8\u00b0 rotating ear cups and soft integrated headband for all-day comfort",
      "unit": ""
    },
    "Special Features": {
      "name": "Special Features",
      "description": "Unique and notable functionalities that set the headphones apart, such as travel pouch and AUX cable support.",
      "value": "Travel Pouch, AUX",
      "unit": ""
    },
    "Adaptive ANC": {
      "name": "Adaptive ANC",
      "description": "Automatically adjusts the level of noise cancellation based on the surrounding environment for optimal performance.",
      "value": "Adaptive ANC",
      "unit": ""
    },
    "Impedance": {
      "name": "Impedance",
      "description": "The measure of the headphones' resistance to an electrical signal, measured in ohms (\u03a9). This affects how well the headphones pair with different audio sources.",
      "value": "unspecified",
      "unit": ""
    },
    "Frequency Response": {
      "name": "Frequency Response",
      "description": "The range of frequencies that the headphones can reproduce, measured in Hertz (Hz).  A wider frequency response generally indicates more detailed sound reproduction.",
      "value": "unspecified",
      "unit": ""
    },
    "Audio Codec Support": {
      "name": "Audio Codec Support",
      "description": "The audio codecs that the headphones support, which affects the quality of wireless audio transmission. Common codecs include SBC, AAC, and aptX.",
      "value": "unspecified",
      "unit": ""
    },
    "Water Resistance": {
      "name": "Water Resistance",
      "description": "Indicates the degree to which the headphones are protected against water or sweat damage.",
      "value": "no",
      "unit": ""
    },
    "Sensitivity": {
      "name": "Sensitivity",
      "description": "The efficiency of the headphones in converting an electrical signal into sound, measured in decibels (dB). Higher sensitivity headphones require less power to produce the same volume.",
      "value": "unspecified",
      "unit": ""
    },
    "Wireless Range": {
      "name": "Wireless Range",
      "description": "The maximum distance the headphones can maintain a stable Bluetooth connection with the audio source.",
      "value": "unspecified",
      "unit": ""
    }
  }
}
```
#### Apple AirPods Max
```json
{
  "product": "Apple AirPods Max",
  "specs": {
    "Color": {
      "name": "Color",
      "description": "Available colors for the AirPods Max. This refers to the external finish of the headphones.",
      "value": "Midnight, Starlight, Blue, Purple, Orange",
      "unit": ""
    },
    "Audio Technology": {
      "name": "Audio Technology",
      "description": "Details the audio enhancements and features incorporated into the AirPods Max for sound reproduction and clarity.",
      "value": "Apple-designed dynamic driver, Pro-level Active Noise Cancellation, Transparency mode, Personalized Spatial Audio with dynamic head tracking, Adaptive EQ, Lossless Audio via USB\u2011C",
      "unit": ""
    },
    "Noise Cancellation Technology": {
      "name": "Noise Cancellation Technology",
      "description": "Describes the noise cancellation capabilities of the AirPods Max, which reduces unwanted ambient sounds.",
      "value": "Active Noise Cancellation",
      "unit": ""
    },
    "Battery": {
      "name": "Battery",
      "description": "Information on battery life and charging capabilities of the AirPods Max, including listening time, movie playback time, and charging methods.",
      "value": "Up to 20 hours listening time with ANC, Up to 20 hours movie playback with Spatial Audio, 5 minutes charge for 1.5 hours listening",
      "unit": ""
    },
    "Microphones": {
      "name": "Microphones",
      "description": "Details the number and type of microphones used in the AirPods Max, which are used for noise cancellation and voice pickup.",
      "value": "Nine microphones total",
      "unit": ""
    },
    "Chip": {
      "name": "Chip",
      "description": "Specifies the chip used in the AirPods Max. This chip enables various features like audio processing and connectivity.",
      "value": "Apple H1 headphone chip (each ear cup)",
      "unit": ""
    },
    "Connectivity": {
      "name": "Connectivity",
      "description": "Describes the wireless technology used for connecting the AirPods Max to devices.",
      "value": "Bluetooth 5.0",
      "unit": ""
    },
    "Sensors": {
      "name": "Sensors",
      "description": "Lists the different sensors incorporated into the AirPods Max. These sensors enable features like head detection and adaptive audio.",
      "value": "Optical sensor, Position sensor, Case-detect sensor, Accelerometer, Gyroscope",
      "unit": ""
    },
    "Size and Weight": {
      "name": "Size and Weight",
      "description": "Provides the physical dimensions and weight of the AirPods Max, including the smart case.",
      "value": "13.6 ounces (386.2 grams)",
      "unit": ""
    },
    "Controls": {
      "name": "Controls",
      "description": "Details the physical controls available on the AirPods Max, such as the Digital Crown and noise control button.",
      "value": "Digital Crown, Noise control button, Hey Siri",
      "unit": ""
    },
    "System Requirements": {
      "name": "System Requirements",
      "description": "Lists the minimum software and device requirements for using the AirPods Max with various Apple devices.",
      "value": "Latest iOS, iPadOS, watchOS, macOS, tvOS, visionOS",
      "unit": ""
    },
    "Accessibility": {
      "name": "Accessibility",
      "description": "Highlights the accessibility features available on the AirPods Max for users with disabilities.",
      "value": "Live Listen audio, Headphone levels, Headphone Accommodations",
      "unit": ""
    },
    "In the Box": {
      "name": "In the Box",
      "description": "Lists the items included in the AirPods Max packaging.",
      "value": "AirPods Max, Smart Case, USB\u2011C Charge Cable, Documentation",
      "unit": ""
    }
  }
}
```


In [25]:
resp_standardize = client.models.generate_content(
    model=GOOGLE_AI_MODEL,
    contents=prompt_standardize,
    config=types.GenerateContentConfig(
        responseMimeType="application/json",
        responseSchema=SpecsMappingResult,
    ),
)

standardized = resp_standardize.parsed

#### Post-Process to Unify Specs

In [26]:
all_specs = {}

for mapping in standardized.results:
    name = mapping.mapped_name

    for competitor in competitors:
        old_name = getattr(mapping, competitor)
        if old_name == "N/A":
            print(f"Skipped {name} for {competitor}.")
            continue

        try:
            specs = all_specs_raw[competitor]["specs"][old_name]
        except KeyError:
            print(f"AI halluincated something, skipped {name} for {competitor}.")
            continue
        all_specs[name] = all_specs.get(name, {})
        all_specs[name][competitor] = specs

print(json.dumps(all_specs, indent=2))

Skipped Weight for Bose QuietComfort Ultra Headphones.
Skipped Audio Codec Support for Apple AirPods Max.
Skipped Battery Charge Time for Anker Soundcore Space One.
Skipped Headphone Fit for Apple AirPods Max.
{
  "Noise Cancelling": {
    "Bose QuietComfort Ultra Headphones": {
      "name": "Noise Cancelling",
      "description": "Indicates whether the headphone has noise cancelling feature to reduce unwanted ambient sounds.",
      "value": true,
      "unit": ""
    },
    "Sony WH-1000XM5": {
      "name": "NOISE CANCELLING",
      "description": "Does the headphone have noise cancelling feature?",
      "value": "Yes",
      "unit": ""
    },
    "Focal Bathys": {
      "name": "Noise Cancellation Technology",
      "description": "Active Noise Cancellation (ANC) technology reduces unwanted background noise, offering listening without disturbances from surrounding sounds.",
      "value": "Yes",
      "unit": ""
    },
    "Anker Soundcore Space One": {
      "name": "Active Noi

In [27]:
with open(SPECS_PATH, "w") as f:
    json.dump(all_specs, f, indent=2)