In [1]:
import re
import json
import requests
from rdkit import Chem
import pubchempy as pcp # type: ignore
from rdkit import Chem
from typing import Optional, List, Dict, Any, Union, Tuple

In [2]:
from typing import List

def is_chemically_compatible(
    existing_pictograms: List[str],
    new_pictograms: List[str],
    existing_acid_base_class: str,
    new_acid_base_class: str,
    existing_state: str,
    new_state: str,
    group_name: str = None
) -> bool:
    # Rule 1: Acid/base incompatibility
    if ("acid" in existing_acid_base_class and "base" in new_acid_base_class) or \
       ("base" in existing_acid_base_class and "acid" in new_acid_base_class):
        return False

    # Rule 2: Incompatible pictograms
    incompatible_pairs = [
        ("Flammable", "Oxidizer"),
        ("Flammable", "Corrosive"),
        ("Corrosive", "Oxidizer")
    ]
    for pic1 in existing_pictograms:
        for pic2 in new_pictograms:
            if (pic1, pic2) in incompatible_pairs or (pic2, pic1) in incompatible_pairs:
                return False

    # Rule 3: Acid + Corrosive + Acute Toxic or Health Hazard
    if "acid" in existing_acid_base_class and "Corrosive" in existing_pictograms:
        if "Acute Toxic" in new_pictograms or "Health Hazard" in new_pictograms:
            return False
    if "acid" in new_acid_base_class and "Corrosive" in new_pictograms:
        if "Acute Toxic" in existing_pictograms or "Health Hazard" in existing_pictograms:
            return False

    # Rule 4: Solids and liquids not together
    if (
        (existing_state == "solid" and new_state == "liquid") or
        (existing_state == "liquid" and new_state == "solid")
    ):
        return False

    # Rule 5: Group-based restrictions (overrides compound-level checks)
    if group_name:
        group_name = group_name.lower()

        if group_name == "oxidizer":
            if "Flammable" in new_pictograms or "Corrosive" in new_pictograms:
                return False
        if group_name == "flammable" or group_name == "pyrophoric":
            if "Oxidizer" in new_pictograms or "Corrosive" in new_pictograms:
                return False
        if "corrosive" in group_name or "irritant" in group_name:
            if "Oxidizer" in new_pictograms or "Flammable" in new_pictograms:
                return False
        if "toxicity" in group_name or group_name == "cmr_stot":
            if "acid" in new_acid_base_class:
                return False
        if "acid" in group_name:
            if "Health Hazard" in new_pictograms or "Acute Toxic" in new_pictograms:
                return False

    return True


In [5]:
# Keep default_group(), default_group_gas(), and initialize_storage_groups() as before
def default_group():
    return {"solid": [], "liquid": []}

def default_group_gas():
    return {"gas": []}

def initialize_storage_groups() -> Dict[str, Dict[str, List[Dict[str, Any]]]]:
    return {
        "none": default_group(),
        "hazardous_environment": default_group(),
        "acute_toxicity": default_group(),
        "cmr_stot": default_group(),
        "toxicity_2_3": default_group(),
        "acid_corrosive_1": default_group(),
        "acid_irritant": default_group(),
        "base_corrosive_1": default_group(),
        "base_irritant": default_group(),
        "pyrophoric": default_group(),
        "flammable": default_group(),
        "oxidizer": default_group(),
        "explosive": default_group(),
        "compressed_gas": default_group_gas(),
        "nitric_acid": default_group()
    }

def chemsort_multiple_order_3(
    compounds: List[Dict[str, Any]],
    storage_groups: Dict[str, Dict[str, List[Dict[str, Any]]]]
) -> Dict[str, Dict[str, List[Dict[str, Any]]]]:
    phrases_hazard = [
        "may cause genetic defects", "cancer", "may damage fertility", "causes damage to organs"
    ]
    phrases_flam = [
        "catches fire spontaneously", "in contact with water emits", "may react explosively"
    ]

    pictogram_priority = {
        "Explosive": 1,
        "Oxidizer": 2,
        "Flammable": 3,
        "Corrosive": 4,
        "Acute Toxic": 5,
        "Health Hazard": 5,
        "Irritant": 6,
        "Environmental Hazard": 6,
        "Compressed Gas": 1
    }

    def compound_priority(compound):
        if compound["sorted_pictograms"]:
            return pictogram_priority.get(compound["sorted_pictograms"][0], 100)
        return 100

    compounds = sorted(compounds, key=compound_priority)

    custom_group_counter = 1
    custom_group_prefix = "custom_storage_"
    custom_groups = [key for key in storage_groups if key.startswith(custom_group_prefix)]

    def is_compatible_with_group(group_name, state_key, group_dict=storage_groups):
        compounds_in_group = group_dict[group_name][state_key]
        if not compounds_in_group:
            # Perform compatibility check based on the group name even if empty
            return is_chemically_compatible(
                [],
                compound["sorted_pictograms"],
                "",
                compound["acid_base_class"],
                "",
                compound["state_room_temp"],
                group_name)
        for existing in compounds_in_group:
            if not is_chemically_compatible(
                existing["sorted_pictograms"],
                compound["sorted_pictograms"],
                existing["acid_base_class"],
                compound["acid_base_class"],
                existing["state_room_temp"],
                compound["state_room_temp"],
                group_name
            ):
                return False
        return True

    for compound in compounds:
        chemical = compound["name"]
        sorted_pictograms = compound["sorted_pictograms"]
        hazard_statements = compound["hazard_statements"]
        acid_base_class = compound["acid_base_class"].lower()
        state = compound["state_room_temp"]
        state_key = 'liquid' if 'liquid' in state else 'solid' if 'solid' in state else 'gas'

        all_statements = " ".join(hazard_statements).lower()
        sorted_successfully = False

        if sorted_pictograms:
            first_picto = sorted_pictograms[0]

            if chemical.lower() == "nitric acid":
                storage_groups["nitric_acid"][state_key].append(compound)
                sorted_successfully = True

            elif first_picto == "Compressed Gas":
                storage_groups["compressed_gas"][state_key].append(compound)
                sorted_successfully = True

            elif first_picto == "Explosive":
                storage_groups["explosive"][state_key].append(compound)
                sorted_successfully = True

            elif first_picto == "Oxidizer":
                group = "oxidizer"
                if is_compatible_with_group(group, state_key, compound):
                    storage_groups["oxidizer"][state_key].append(compound)
                    sorted_successfully = True

            elif first_picto == "Flammable":
                group = "pyrophoric" if any(p in all_statements for p in phrases_flam) else "flammable"
                if is_compatible_with_group(group, state_key, compound):
                    storage_groups[group][state_key].append(compound)
                    sorted_successfully = True

            elif first_picto == "Corrosive":
                is_base = "base" in acid_base_class
                is_acid = "acid" in acid_base_class
                is_severe = "causes severe skin burns and eye damage" in all_statements
                group = None
                if is_base:
                    group = "base_corrosive_1" if is_severe else "base_irritant"
                elif is_acid:
                    group = "acid_corrosive_1" if is_severe else "acid_irritant"
                if group and is_compatible_with_group(group, state_key, compound):
                    storage_groups[group][state_key].append(compound)
                    sorted_successfully = True

            elif first_picto in ["Acute Toxic", "Health Hazard"]:
                if "fatal" in all_statements or "toxic" in all_statements:
                    group = "acute_toxicity"
                elif any(p in all_statements for p in phrases_hazard):
                    group = "cmr_stot"
                else:
                    group = "toxicity_2_3"
                if is_compatible_with_group(group, state_key, compound):
                    storage_groups[group][state_key].append(compound)
                    sorted_successfully = True

            elif first_picto in ["Irritant", "Environmental Hazard"]:
                group = "hazardous_environment" if "toxic to aquatic life" in all_statements else "none"
                if is_compatible_with_group(group, state_key, compound):
                    storage_groups[group][state_key].append(compound)
                    sorted_successfully = True

        if not sorted_successfully:
            if not sorted_pictograms:
                group = "none"
                if is_compatible_with_group(group, state_key, compound):
                    storage_groups[group][state_key].append(compound)
                    sorted_successfully = True
            else:
                for custom_group in custom_groups:
                    if is_compatible_with_group(custom_group, state_key, compound):
                        storage_groups[custom_group][state_key].append(compound)
                        sorted_successfully = True
                        break

        if not sorted_successfully:
            while True:
                new_group_name = f"{custom_group_prefix}{custom_group_counter}"
                if new_group_name not in storage_groups:
                    break
                custom_group_counter += 1
            custom_groups.append(new_group_name)
            storage_groups[new_group_name] = (
                {"liquid": [], "solid": [], "gas": []}
            )
            storage_groups[new_group_name][state_key].append(compound)

    return storage_groups



In [6]:
test_compounds = [
    {
        "name": "Nitric Acid",
        "sorted_pictograms": ["Oxidizer", "Corrosive", "Health Hazard"],
        "hazard_statements": [
            "Causes severe skin burns and eye damage",
            "May cause respiratory irritation",
            "May cause genetic defects"
        ],
        "state_room_temp": "liquid",
        "acid_base_class": "acid"
    },
    {
        "name": "Sodium Hydroxide",
        "sorted_pictograms": ["Corrosive"],
        "hazard_statements": ["Causes severe skin burns and eye damage"],
        "state_room_temp": "solid",
        "acid_base_class": "base"
    },
    {
        "name": "Acetic Acid",
        "sorted_pictograms": ["Flammable", "Corrosive"],
        "hazard_statements": ["Causes severe skin burns and eye damage"],
        "state_room_temp": "liquid",
        "acid_base_class": "acid"
    },
    {
        "name": "Hydrogen Peroxide",
        "sorted_pictograms": ["Oxidizer", "Flammable", "Corrosive", "Health Hazard"],
        "hazard_statements": [
            "May intensify fire",
            "Causes severe skin burns and eye damage"
        ],
        "state_room_temp": "liquid",
        "acid_base_class": "acid"
    },
    {
        "name": "Ammonium Nitrate",
        "sorted_pictograms": ["Explosive", "Oxidizer"],
        "hazard_statements": ["May explode if heated", "May intensify fire"],
        "state_room_temp": "solid",
        "acid_base_class": "neutral"
    },
    {
        "name": "Methanol",
        "sorted_pictograms": ["Flammable", "Health Hazard", "Irritant"],
        "hazard_statements": [
            "Highly flammable liquid and vapor",
            "Toxic if swallowed, in contact with skin or inhaled",
            "Causes damage to organs"
        ],
        "state_room_temp": "liquid",
        "acid_base_class": "neutral"
    },
    {
        "name": "Triflic Acid",
        "sorted_pictograms": ["Corrosive"],
        "hazard_statements": ["Causes severe skin burns and eye damage"],
        "state_room_temp": "liquid",
        "acid_base_class": "acid"
    },
    {
        "name": "Sodium Bicarbonate",
        "sorted_pictograms": [],
        "hazard_statements": [],
        "state_room_temp": "solid",
        "acid_base_class": "base"
    },
    {
        "name": "Toluene",
        "sorted_pictograms": ["Flammable", "Health Hazard", "Irritant"],
        "hazard_statements": [
            "Highly flammable liquid and vapor",
            "Suspected of damaging fertility or the unborn child"
        ],
        "state_room_temp": "liquid",
        "acid_base_class": "neutral"
    },
    {
        "name": "Calcium Oxide",
        "sorted_pictograms": ["Corrosive"],
        "hazard_statements": ["Causes severe skin burns and eye damage"],
        "state_room_temp": "solid",
        "acid_base_class": "base"
    },
    {
        "name": "Picric Acid",
        "sorted_pictograms": ["Explosive", "Flammable", "Health Hazard"],
        "hazard_statements": [
            "Explosive; risk of explosion by shock, friction, fire or other sources of ignition",
            "May cause genetic defects"
        ],
        "state_room_temp": "solid",
        "acid_base_class": "acid"
    },
    {
        "name": "Hydrazine",
        "sorted_pictograms": ["Flammable", "Corrosive", "Health Hazard"],
        "hazard_statements": [
            "Highly flammable liquid and vapor",
            "Causes severe skin burns and eye damage",
            "May cause cancer"
        ],
        "state_room_temp": "liquid",
        "acid_base_class": "base"
    },
    {
        "name": "Sucrose",
        "sorted_pictograms": [],
        "hazard_statements": [],
        "state_room_temp": "solid",
        "acid_base_class": "neutral"
    },
    {
        "name": "Sodium Chloride",
        "sorted_pictograms": [],
        "hazard_statements": [],
        "state_room_temp": "solid",
        "acid_base_class": "neutral"
    },
    {
        "name": "amino acid",
        "sorted_pictograms": [],
        "hazard_statements": [],
        "state_room_temp": "solid",
        "acid_base_class": "acid"
    },
    {
        "name": "Nitroglycerin",
        "sorted_pictograms": ["Explosive", "Flammable", "Health Hazard", "Irritant"],
        "hazard_statements": [
            "Unstable explosive",
            "Highly flammable liquid and vapor",
            "May cause damage to organs"
        ],
        "state_room_temp": "liquid",
        "acid_base_class": "neutral"
    }
]




KeyError: 'oxidizer'