In [None]:
# This code contains a large number of function definitions that allow the user to
# create or modify a json file in accord with CrowdChem's json structure.

"""
The code begins by attempting the example json file "特表2023-516834.json" to update entires.
If no such file exists, the code will create its own empty json.
At the end of the code exist some examples of how to use the various functions. :D

Here is a breif summary of each function definition & their purpose:
(For a more detailed view of each function & their arguments, view their corresponding comments)

= parse_value_unit(): Parses a string containing a value and an optional unit into a tuple (value, unit)
= get_next_available_id(): Finds the next available integer ID for a new top-level entry.
= _create_material_id_content(): Helper to create the 'id' content for a raw material entry.
= create_raw_material_entry(): Creates a single raw material entry to be used in a '原料' (raw material) list.
= create_process_step_entry(): Creates a single process step entry for a 'プロセス' (process) list.
= create_property_item_entry(): Creates a single property item entry for a '物性' (property) list.
= def add_entry(): Adds a new entry (実施例 or メタデータ) to the JSON data.
= def update_entry(): Updates an existing entry in the JSON data.

"""

import json
import re
import copy # Import copy for deep copies of complex structures

# --- Data Loading (Kept for context, but not strictly used by new add functions) ---
try:
    with open('特表2023-516834.json', 'r', encoding='utf-8') as f:
        # Load existing patent data. We'll add new entries to this structure.
        patent_data = json.load(f)
    # The 3d_printing_process_100_samples.json is not directly used for *creation* here,
    # but the previous conversion logic might have used it.
    # For this task, we focus on generating data in the patent_data style.
except FileNotFoundError:
    print("Warning: '特表2023-516834.json' not found. Starting with an empty JSON structure.")
    patent_data = {} # Start with an empty dictionary if the file doesn't exist


# --- Helper Functions (Reused and new) ---

def parse_value_unit(s):
    """
    Parses a string containing a value and an optional unit into a tuple (value, unit).
    Handles integers, floats, percentages, and strings with '±'.
    """
    if isinstance(s, (int, float)):
        return float(s), None # Ensure numeric types are floats for consistency

    s = str(s).strip()
    if not s:
        return None, None

    # Handle "Yes" / "No" or other non-numeric string values
    if s.lower() in ["yes", "no"]:
        return s, None

    # Handle percentage: "76%"
    if s.endswith('%'):
        try:
            value = float(s[:-1].strip())
            return value, "%"
        except ValueError:
            pass # Fall through if it's not just a number before %

    # Handle "±VALUE UNIT" e.g., "±0.05 mm"
    # This also covers cases like "±0.05" without a unit at the end of the string
    if s.startswith('±'):
        # Split only on the first space to keep the value part intact
        parts = s.split(' ', 1)
        if len(parts) == 2:
            return parts[0], parts[1]
        else:
            return s, None # If no unit after '±', return as is

    # General case: number followed by unit (optional space)
    # This regex tries to capture a potential number and then the rest as unit
    # It accounts for numbers with decimals and optional signs.
    # It will also capture units like "g/cm³"
    match = re.match(r"([+\-]?\d+(?:\.\d+)?)\s*(.*)", s)
    if match:
        value_str = match.group(1)
        unit = match.group(2).strip() or None # None if unit is empty string
        try:
            value = float(value_str)
        except ValueError:
            value = value_str # Keep as string if it couldn't be converted to float

        # Standardize micro symbol in units
        if unit:
            unit = unit.replace('μm', 'µm').replace('μs', 'µs').replace('mPa・s', 'mPa・s')

        return value, unit

    # If no specific pattern matches, try converting the whole string to float
    try:
        return float(s), None
    except ValueError:
        return s, None # Return original string if not a number and no clear unit


def get_next_available_id(current_data):
    """
    Finds the next available integer ID for a new top-level entry.
    """
    if not current_data:
        return 1
    # Convert keys to integers, find max, and add 1
    max_id = 0
    for key in current_data.keys():
        try:
            max_id = max(max_id, int(key))
        except ValueError:
            # Ignore non-integer keys if any exist (e.g., "meta_info")
            pass
    return max_id + 1


def _create_material_id_content(
    material_name: str,
    material_type: str = "物質名", # "物質名" or "名称" for named components
    pubchem_id: float = None,
    smiles: str = None,
    raw_materials: list = None,  # List of raw_material_entry dicts for nested components
    processes: list = None,      # List of process_step_entry dicts for nested components
    properties: list = None      # List of property_item_entry dicts for nested components
):
    """
    Helper to create the 'id' content for a raw material entry.
    This can represent either a simple substance or a more complex named component
    (like a polyester polyol) which might have its own raw materials, processes, and properties.
    """
    content = {
        "実施例/メタデータ": "メタデータ",
    }

    if material_type == "物質名":
        content["物質名"] = material_name
        if pubchem_id is not None:
            content["PubChemID"] = pubchem_id
        if smiles:
            content["smiles"] = {"主smiles": [{"smiles": smiles}]}
    else: # material_type is "名称" for named components
        content["名称"] = material_name
        if raw_materials:
            content["原料"] = raw_materials
        if processes:
            content["プロセス"] = processes
        if properties:
            content["物性"] = properties

    # data_name will be added by the main add_entry functions
    return content


def create_raw_material_entry(
    material_name: str,
    value: float,
    unit: str,
    material_type: str = "物質名", # "物質名" or "名称" for nested component
    pubchem_id: float = None,
    smiles: str = None,
    nested_raw_materials: list = None,
    nested_processes: list = None,
    nested_properties: list = None
):
    """
    Creates a single raw material entry to be used in a '原料' list.
    Args:
        material_name: The name of the substance or component.
        value: The quantity of the material.
        unit: The unit of quantity (e.g., "質量部").
        material_type: "物質名" for simple substances, "名称" for named components.
        pubchem_id: PubChem ID (optional, for substances).
        smiles: SMILES string (optional, for substances).
        nested_raw_materials: List of raw material entries if this is a complex component.
        nested_processes: List of process step entries if this is a complex component.
        nested_properties: List of property item entries if this is a complex component.
    Returns:
        A dictionary representing a raw material entry.
    """
    id_content = _create_material_id_content(
        material_name=material_name,
        material_type=material_type,
        pubchem_id=pubchem_id,
        smiles=smiles,
        raw_materials=nested_raw_materials,
        processes=nested_processes,
        properties=nested_properties
    )

    return {
        "id": id_content,
        "値": value,
        "単位": unit
    }


def create_process_step_entry(
    process_name: str,
    conditions: list = None # List of dicts: {"条件名": str, "値": val, "単位": str (optional)}
):
    """
    Creates a single process step entry for a 'プロセス' list.
    Args:
        process_name: The name of the process (e.g., "混合", "重合反応").
        conditions: A list of dictionaries, each describing a condition for the process.
                    Each condition dict should have "条件名" and "値", and optionally "単位".
    Returns:
        A dictionary representing a process step entry.
    """
    entry = {
        "プロセス名": process_name
    }
    if conditions:
        parsed_conditions = []
        for cond in conditions:
            cond_value, cond_unit = parse_value_unit(cond["値"])
            parsed_cond = {"条件名": cond["条件名"], "値": cond_value}
            if cond_unit:
                parsed_cond["単位"] = cond_unit
            parsed_conditions.append(parsed_cond)
        entry["条件"] = parsed_conditions
    return entry


def create_property_item_entry(
    property_name: str,
    value: any, # Can be float or string (e.g., "Yes", "No")
    unit: str = None, # Optional unit
    conditions: list = None # List of dicts: {"条件名": str, "値": val, "単位": str (optional)}
):
    """
    Creates a single property item entry for a '物性' list.
    Args:
        property_name: The name of the property (e.g., "粘度", "T剥離強度").
        value: The value of the property.
        unit: The unit of the property (optional).
        conditions: A list of dictionaries, each describing a condition for the measurement.
                    Each condition dict should have "条件名" and "値", and optionally "単位".
    Returns:
        A dictionary representing a property item entry.
    """
    parsed_value, inferred_unit = parse_value_unit(value)

    # If a unit is provided directly, use it. Otherwise, use the inferred unit.
    # If parse_value_unit returned a unit that wasn't explicitly passed, prefer the explicit one.
    final_unit = unit if unit is not None else inferred_unit

    entry = {
        "物性名": property_name,
        "値": parsed_value
    }
    if final_unit:
        entry["単位"] = final_unit

    if conditions:
        parsed_conditions = []
        for cond in conditions:
            cond_value, cond_unit = parse_value_unit(cond["値"])
            parsed_cond = {"条件名": cond["条件名"], "値": cond_value}
            if cond_unit:
                parsed_cond["単位"] = cond_unit
            parsed_conditions.append(parsed_cond)
        entry["条件"] = parsed_conditions
    return entry


def add_entry(
    data: dict,
    entry_type: str, # "実施例" or "メタデータ"
    name: str = None, # For "名称" or "物質名"
    pubchem_id: float = None, # For "物質名" metadata
    smiles: str = None, # For "物質名" metadata
    raw_materials: list = None, # List of raw_material_entry dicts
    processes: list = None, # List of process_step_entry dicts
    properties: list = None # List of property_item_entry dicts
):
    """
    Adds a new entry (実施例 or メタデータ) to the JSON data.
    Args:
        data: The current dictionary of JSON entries.
        entry_type: "実施例" or "メタデータ".
        name: The name of the example/metadata. For 'メタデータ' of a simple substance, this
              will be mapped to '物質名'. For 'メタデータ' of a named component, it's '名称'.
        pubchem_id: PubChem ID (optional, for simple substances).
        smiles: SMILES string (optional, for simple substances).
        raw_materials: List of raw material entries for the top-level example/component.
        processes: List of process step entries for the top-level example/component.
        properties: List of property item entries for the top-level example/component.
    Returns:
        The updated data dictionary with the new entry.
    """
    new_id = str(get_next_available_id(data))
    new_entry = {
        "実施例/メタデータ": entry_type,
    }

    if entry_type == "実施例":
        if not name:
            raise ValueError("An '実施例' entry requires a 'name'.")
        new_entry["名称"] = name
        if raw_materials:
            new_entry["原料"] = raw_materials
        if processes:
            new_entry["プロセス"] = processes
        if properties:
            new_entry["物性"] = properties
    elif entry_type == "メタデータ":
        # Determine if it's a simple substance or a named component metadata
        if pubchem_id is not None or smiles is not None: # Suggests a simple substance
            if not name:
                raise ValueError("A 'メタデータ' substance entry requires a 'name' (物質名).")
            new_entry["物質名"] = name
            if pubchem_id is not None:
                new_entry["PubChemID"] = pubchem_id
            if smiles:
                new_entry["smiles"] = {"主smiles": [{"smiles": smiles}]}
        else: # Assumed to be a named component metadata
            if not name:
                raise ValueError("A 'メタデータ' component entry requires a 'name' (名称).")
            new_entry["名称"] = name
            if raw_materials:
                new_entry["原料"] = raw_materials
            if processes:
                new_entry["プロセス"] = processes
            if properties:
                new_entry["物性"] = properties
    else:
        raise ValueError("Invalid 'entry_type'. Must be '実施例' or 'メタデータ'.")

    new_entry["data_name"] = f"特表2023-516834_{new_id}"
    data[new_id] = new_entry
    return data

def update_entry(
    data: dict,
    entry_id: str, # The string ID of the entry to update (e.g., "1", "104")
    updates: dict # A dictionary of fields to update (e.g., {"名称": "Updated Example 1", "物性": [...]})
):
    """
    Updates an existing entry in the JSON data.
    Args:
        data: The current dictionary of JSON entries.
        entry_id: The string ID of the entry to update.
        updates: A dictionary where keys are the field names to update
                 and values are the new values. For lists (原料, プロセス, 物性),
                 it's assumed the new list completely replaces the old one.
    Returns:
        The updated data dictionary.
    Raises:
        ValueError: If the entry_id does not exist.
    """
    if entry_id not in data:
        raise ValueError(f"Entry with ID '{entry_id}' not found.")

    # Perform a deep copy of the entry to avoid modifying the original data prematurely
    # and to allow for complex nested updates without unexpected side effects.
    entry_to_update = copy.deepcopy(data[entry_id])

    for key, value in updates.items():
        if key == "原料":
            # Assume 'value' is a list of raw_material_entry dicts
            entry_to_update["原料"] = value
        elif key == "プロセス":
            # Assume 'value' is a list of process_step_entry dicts
            entry_to_update["プロセス"] = value
        elif key == "物性":
            # Assume 'value' is a list of property_item_entry dicts
            entry_to_update["物性"] = value
        elif key == "Metadata" and "Metadata" in entry_to_update:
            # If Metadata is explicitly updated, merge/replace its sub-fields
            entry_to_update["Metadata"].update(value)
        else:
            entry_to_update[key] = value

    data[entry_id] = entry_to_update
    return data


# --- Main Logic for Demonstration ---

# --- Example Usage (adding new entries) ---

# 1. Add a new simple substance metadata
patent_data = add_entry(
    patent_data,
    entry_type="メタデータ",
    name="新規試薬X",
    pubchem_id=99999.0,
    smiles="CCO"
)

# 2. Create a nested polyester polyol (intermediate product) metadata
# Define its raw materials first
nested_raw_mat_1 = create_raw_material_entry(
    material_name="ジエチレングリコール",
    value=10.0,
    unit="質量部",
    material_type="物質名" # It's a simple substance
)
nested_raw_mat_2 = create_raw_material_entry(
    material_name="アジピン酸",
    value=90.0,
    unit="質量部",
    material_type="物質名"
)
# Define its process
nested_process = create_process_step_entry(
    process_name="重合反応",
    conditions=[
        {"条件名": "反応温度", "値": 200.0, "単位": "°C"},
        {"条件名": "反応時間", "値": "5時間"}
    ]
)
# Define its properties
nested_property = create_property_item_entry(
    property_name="水酸基価",
    value=150.0,
    unit="mgKOHg^-1"
)

# Now, create the named component metadata using the above nested structures
patent_data = add_entry(
    patent_data,
    entry_type="メタデータ",
    name="新規ポリエステルポリオールZ", # This will be '名称'
    raw_materials=[nested_raw_mat_1, nested_raw_mat_2],
    processes=[nested_process],
    properties=[nested_property]
)

# 3. Add a new '実施例' (example) using newly defined materials and processes/properties
# Let's get the data_name of the newly created polyester polyol for reference
new_polyester_id = str(get_next_available_id(patent_data) - 1) # ID of the last added item
new_polyester_data_name = f"特表2023-516834_{new_polyester_id}"

# Create a raw material entry that references the new polyester polyol
example_raw_material_polyester = {
    "id": {
        "実施例/メタデータ": "メタデータ",
        "名称": "新規ポリエステルポリオールZ",
        "原料": [nested_raw_mat_1, nested_raw_mat_2], # Include its nested details for completeness
        "プロセス": [nested_process],
        "物性": [nested_property],
        "data_name": new_polyester_data_name # Reference the unique data_name
    },
    "値": 100.0,
    "単位": "質量部"
}

# Add a simple raw material (assuming it exists in original metadata or is generic)
example_raw_material_simple = create_raw_material_entry(
    material_name="イソホロンジイソシアネートのヌレート体", # Reusing an existing substance name for simplicity
    value=20.0,
    unit="質量部"
)

# Define properties for the new example
example_properties = [
    create_property_item_entry(
        property_name="新規粘度",
        value=1200.0,
        unit="mPa・s",
        conditions=[{"条件名": "測定温度", "値": 80.0, "単位": "°C"}]
    ),
    create_property_item_entry(
        property_name="新規強度",
        value=4.5,
        unit="N100m^-2"
    )
]

patent_data = add_entry(
    patent_data,
    entry_type="実施例",
    name="新規実施例XX",
    raw_materials=[example_raw_material_polyester, example_raw_material_simple],
    processes=[create_process_step_entry("新規混合プロセス")],
    properties=example_properties
)


# --- Example Usage (updating existing entries) ---
# Let's update "実施例1" (ID "1")
try:
    current_example_1_name = patent_data["1"]["名称"]
    print(f"\nBefore update, Example 1 name: {current_example_1_name}")

    # Update its name
    patent_data = update_entry(
        patent_data,
        entry_id="1",
        updates={"名称": "更新された実施例1"}
    )
    print(f"After update, Example 1 name: {patent_data['1']['名称']}")

    # Let's try to add a new property to an existing entry
    # Note: When updating lists (原料, プロセス, 物性), the current implementation replaces them.
    # To *add* to an existing list, you'd fetch the list, append, and then pass the new list.

    # Get current properties of Example 1
    existing_properties_example_1 = patent_data["1"].get("物性", [])

    # Create a new property to add
    new_property_for_example_1 = create_property_item_entry(
        property_name="新追加物性",
        value=100.0,
        unit="psi"
    )

    # Append the new property to the existing list
    updated_properties_list = existing_properties_example_1 + [new_property_for_example_1]

    # Update Example 1 with the modified properties list
    patent_data = update_entry(
        patent_data,
        entry_id="1",
        updates={"物性": updated_properties_list}
    )
    print(f"\nUpdated Example 1 with new property. New properties count: {len(patent_data['1']['物性'])}")
    print(f"Last property of updated Example 1: {patent_data['1']['物性'][-1]['物性名']}")


except KeyError:
    print("\nCould not update example '1'. It might not exist in the initial data.")
except ValueError as e:
    print(f"\nError during update: {e}")


# --- Save the modified JSON data ---
output_file_name_modified = "updated_patent_data.json"
with open(output_file_name_modified, 'w', encoding='utf-8') as f:
    json.dump(patent_data, f, ensure_ascii=False, indent=2)

print(f"\nModified JSON data saved to '{output_file_name_modified}'")

# Optionally, print a snippet of the converted data for review
# print("\n--- Modified Data Snippet (Last 3 Entries) ---")
# print(json.dumps(list(patent_data.values())[-3:], ensure_ascii=False, indent=2))
