In [1]:
import json

In [2]:

# Nested RAV4 spec
rav4 = {
    "document_id": "toyota_rav4_2024",
    "vehicle": {"model": "RAV4", "type": "SUV", "year": 2024},
    "variants": [
        {
            "name": "Gasoline",
            "engine": {"horsepower": 203},
            "pricing": {"base": 28000}
        },
        {
            "name": "Hybrid",
            "engine": {"horsepower": 219},
            "pricing": {"base": 30000}
        }
    ]
}

In [4]:
rav4['variants'][0]

{'name': 'Gasoline', 'engine': {'horsepower': 203}, 'pricing': {'base': 28000}}

In [5]:
# Flatten first variant
variant = rav4['variants'][0]

In [6]:
flat = {
    "document_id": f"{rav4['document_id']}_gasoline",
    "model": rav4['vehicle']['model'],
    "type": rav4['vehicle']['type'],
    "year": rav4['vehicle']['year'],
    "variant": variant['name'],
    "horsepower": variant['engine']['horsepower'],
    "price": variant['pricing']['base']
}


In [8]:
print(json.dumps(flat, indent=2))

{
  "document_id": "toyota_rav4_2024_gasoline",
  "model": "RAV4",
  "type": "SUV",
  "year": 2024,
  "variant": "Gasoline",
  "horsepower": 203,
  "price": 28000
}


In [9]:
def flatten_car_spec(car_spec):
    """Flatten nested car spec to metadata records"""
    records = []
    
    for variant in car_spec['variants']:
        record = {
            "document_id": f"{car_spec['document_id']}_{variant['name'].lower()}",
            "model": car_spec['vehicle']['model'],
            "type": car_spec['vehicle']['type'],
            "year": car_spec['vehicle']['year'],
            "variant": variant['name'],
            "horsepower": variant['engine']['horsepower'],
            "price": variant['pricing']['base']
        }
        records.append(record)
    
    return records

In [10]:
# Flatten RAV4
metadata = flatten_car_spec(rav4)

In [11]:
metadata[0]

{'document_id': 'toyota_rav4_2024_gasoline',
 'model': 'RAV4',
 'type': 'SUV',
 'year': 2024,
 'variant': 'Gasoline',
 'horsepower': 203,
 'price': 28000}

In [14]:
record = metadata[0]

In [15]:
print(f"{record['variant']}: ${record['price']:,}")

Gasoline: $28,000


In [16]:
print(f"Generated {len(metadata)} metadata records\n")
for record in metadata:
    print(f"{record['variant']}: ${record['price']:,}")

Generated 2 metadata records

Gasoline: $28,000
Hybrid: $30,000
