# 2. Custom Template Dictionaries

> **Purpose:**  
> This notebook creates **property dictionaries** for each Asset Administration Shell (AAS).  
> These dictionaries act as **templates for Large Language Model (LLM) prompts**, instructing the model to locate specific property values in the associated product datasheet text.

Each dictionary contains property **names** and, where available, **definitions**.  
The generated dictionaries are compatible with the `pdf2aas.dictionary.core.Dictionary` class to ensure seamless integration into the subsequent LLM-based data extraction workflow.

## 2.0 Imports

In [1]:
import os
import pandas as pd
from tqdm import tqdm
from dataclasses import asdict
import basyx
from basyx.aas import model

from basyx.aas.adapter import aasx
from pdf2aas.model import PropertyDefinition, ClassDefinition
import decimal
import json
from pdf2aas.dictionary.core import dictionary_serializer
from pdf2aas.dictionary.core import Dictionary

# relative path inside the container
data_path = "/app/data/"
processing_path = os.path.join(data_path, "processed/sample")


## 2.1 Template Creation

Creating submodel templates often requires **company-specific preprocessing** to account for variations in AAS structures.
Currently, templates are implemented and tested for the following manufacturers:
- Festo
- Harting
- R. Stahl
- WAGO

Future updates to the **AAS format** or changes in individual company data structures may require minor adjustments to the template generation code.

In [2]:
def extract_template(company, submodel, custom_templates, object_store=None, big_store=None, unavailable_semantic_ids=None, product_id=None):
    technical_properties = submodel.get_referable("TechnicalProperties")
    properties = []
    def extract_properties(elements):
        for elem in elements:
            if isinstance(elem, model.submodel.SubmodelElementCollection):
                extract_properties(elem)  # Recursive call for nested collections
            elif isinstance(elem, model.submodel.MultiLanguageProperty) or isinstance(elem, model.submodel.Property):
                properties.append(elem)
            else:
                print("Unknown Element", elem)
    extract_properties(technical_properties)
    
    
    def get_value_and_type(prop):
        if isinstance(prop, model.submodel.Property):
            value_type = map_value_type(prop.value_type)
            value = prop.value
        elif isinstance(prop, model.submodel.MultiLanguageProperty):
            value_type = "string"
            value = prop.value.get('en', "")
        else:
            value_type, value = "string", ""
        if isinstance(value, decimal.Decimal):
             value = float(value)
        return value, value_type

    def map_value_type(original_type):
        if original_type in [str, 'string', "<class 'str'>"]:
            return "string"
        elif original_type in [bool, 'bool', "<class 'bool'>"]:
            return "bool"
        elif original_type in [basyx.aas.model.datatypes.Float, decimal.Decimal, int, float]:
            return "numeric"
        else:
            print("unknown type", original_type)
            return "string"

    def get_class_name():
        try:
            designation = submodel.get_referable("GeneralInformation").get_referable("ManufacturerProductDesignation").value
            if isinstance(designation, model.MultiLanguageTextType):
                return designation.get('en', "")
            return designation
        except:
            return ""

    property_definitions = []
    class_id = product_id

    for prop in properties:
        if company == "Wago":
            id = prop.id_short
            name = {'en': prop.id_short}
            definition = {"en": prop.description.get('en', prop.id_short) if prop.description else prop.id_short}
            unit = ""
        
        elif company in ["Harting", "RStahl", "Festo"]:
            try:
                concept_description = object_store.get_identifiable(prop.semantic_id.key[0].value)
            except:
                print("concept description not found, trying concept descriptions from other AAS")
                try:
                    concept_description = big_store.get_identifiable(prop.semantic_id.key[0].value)
                except:
                    print(f"Skipping unavailable property {prop}")
                    continue

            id = concept_description.id

            if company == "Harting":
                name = {'en': prop.display_name.get('en', id)}
                definition = {'en': concept_description.description['en']}
                unit = concept_description.embedded_data_specifications[0].data_specification_content.unit if concept_description.embedded_data_specifications else ""
            
            elif company in ["RStahl", "Festo"]:
                eds = concept_description.embedded_data_specifications[0].data_specification_content
                name = {'en': eds.preferred_name.get('en', id)}
                definition = {'en': eds.definition.get('en', id)}
                unit = eds.unit
            if unit == None:
                unit = ""
        else:
            continue  # Unknown company, skip

        value, value_type = get_value_and_type(prop)

        if id in custom_templates['properties']:
            if value not in custom_templates['properties'][id].values:
                custom_templates['properties'][id].values.append(value)
        else:
            custom_templates['properties'][id] = PropertyDefinition(
                id=id,
                name=name,
                type=value_type,
                definition=definition,
                unit=unit,
                values=[value]
            )
        
        property_definitions.append(custom_templates['properties'][id])

    class_name = get_class_name()
    class_description = ""
    keywords = []

    custom_templates['classes'][class_id] = ClassDefinition(
        id=class_id,
        name=class_name,
        description=class_description,
        keywords=keywords,
        properties=property_definitions
    )


In [None]:
custom_templates = {"Type": "CustomDictionary", "release": "0.0", "properties": {}, "classes": {}}

aas_property_collections = {}
big_store = model.DictObjectStore() # optional, to share concept descriptions between AAS files
unavailable_semantic_ids = []
for product_id in os.listdir(processing_path):
    if not os.path.isdir(os.path.join(processing_path, product_id)) or product_id == "configs":
        continue

    company = product_id.split("_")[0]
    if company not in ["RStahl", "Wago", "Harting", "Festo"]:
        print(f"Unknown company for product ID: {product_id}")
        continue

    object_store = model.DictObjectStore()
    file_store = aasx.DictSupplementaryFileContainer()    
    with aasx.AASXReader(os.path.join(processing_path,product_id, product_id+".aasx")) as reader:
        # Read all contained AAS objects and all referenced auxiliary files
        reader.read_into(object_store=object_store,
                        file_store=file_store)
        reader.read_into(object_store=big_store,
                        file_store=file_store)

    for i in object_store:
        if i.id_short == "TechnicalData":
            technical_data_url = i.id
            break
    else:
        print(f"TechnicalData submodel not found for {product_id}")
        continue
    submodel = object_store.get_identifiable(technical_data_url)
    technical_properties = submodel.get_referable("TechnicalProperties")

    extract_template(
        company=company,
        submodel=submodel,
        custom_templates=custom_templates,
        object_store = object_store,
        big_store = big_store,
        product_id=product_id
        )


In [4]:
with open("temp/dict/CustomDictionary-0.0.json", "w") as file:
    json.dump(custom_templates, file, default = dictionary_serializer)

## 2.2 Dictionary Usage Example
This section provides an example of how to load and utilize a generated template dictionary in subsequent workflows.

In [5]:
# Create a custom dictionary by inheriting from the base Dictionary class with dummy URL methods

class CustomDictionary(Dictionary):
    supported_releases = ['0.0']
    def get_class_url(self, class_id: str) -> str | None:
        """Get the web URL for the class of the class_id for details."""
        return None

    def get_property_url(self, property_id: str) -> str | None:
        """Get the web URL for the property id for details."""
        return None

dictionary = CustomDictionary(release='0.0')
dictionary.load_from_file("temp/dict/CustomDictionary-0.0.json")
print(dictionary.get_class_properties(product_id))

[PropertyDefinition(id='LevelOfDetail', name={'en': 'LevelOfDetail'}, type='string', definition={'en': 'level of detail'}, unit='', values=['HIGH']), PropertyDefinition(id='Length', name={'en': 'Length'}, type='string', definition={'en': 'Length'}, unit='', values=['93.60', '69.80', '77.00']), PropertyDefinition(id='Weight', name={'en': 'Weight'}, type='string', definition={'en': 'Weight'}, unit='', values=['15.047', '23.674', '13.680']), PropertyDefinition(id='MaterialOfInsulation', name={'en': 'MaterialOfInsulation'}, type='string', definition={'en': 'MaterialOfInsulation'}, unit='', values=['Polyamide']), PropertyDefinition(id='TARIC', name={'en': 'TARIC'}, type='string', definition={'en': 'customs tariff number (TARIC)'}, unit='', values=['85369010000']), PropertyDefinition(id='CountryOfOrigin', name={'en': 'CountryOfOrigin'}, type='string', definition={'en': 'country of origin'}, unit='', values=['PL', 'DE']), PropertyDefinition(id='AddressOfAdditionalLink', name={'en': 'AddressOf