# Load database components


The purpose of this notebook is to load the database components into Python objects by converting the text objects into a real Python object.


In [1]:
import os
import pandas as pd
from pandas import DataFrame
import potions as pt

In [2]:
def get_file_contents(file_name: str) -> str:
    """
    Load the contents from one of the files
    """
    base_path: str = "./database_components"
    file_path: str = os.path.join(base_path, f"{file_name}.txt")

    with open(file_path, "r") as f:
        return f.read()

# Temperatures and Debye-Huckel Parameters


In [3]:
dh_str: str = get_file_contents("temperature_dh_points")

In [4]:
dh_df: DataFrame = pd.read_csv(
    "./database_components/temperature_dh_points.txt",
    sep="\\s+",
)
dh_df.head()
dh_df.rename(columns={c: c.strip("'") for c in dh_df.columns}, inplace=True)
dh_df = dh_df.drop(columns="temperature").set_index("points").T
dh_df = dh_df.rename(columns={c: c.strip("'") for c in dh_df.columns})
dh_df.index.name = "temperature"
dh_df

points,adh,bdh,bdt
temperature,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0.0,0.4939,0.3253,0.0374
25.0,0.5114,0.3288,0.041
60.0,0.5465,0.3346,0.044
100.0,0.5995,0.3421,0.046
150.0,0.6855,0.3525,0.047
200.0,0.7994,0.3639,0.047
250.0,0.9593,0.3766,0.034
300.0,1.218,0.3925,0.0


# Primary Species

The primary species are the main species that the model tracks for both reactions and mass transport


In [5]:
primary_lines: list[str] = get_file_contents("primary_species").splitlines()
primary_species: dict[str, pt.PrimaryAqueousSpecies] = {}
for primary_line in primary_lines:
    tokens: list[str] = primary_line.split()
    name: str = tokens[0].strip("'")
    dh_size_param: float = float(tokens[1])
    charge: float = float(tokens[2])
    molar_mass: float = float(tokens[3])
    primary_species[name] = pt.PrimaryAqueousSpecies(
        name=name, molar_mass=molar_mass, charge=charge, dh_size_param=dh_size_param
    )
print(f"Number of primary aqueous species: {len(primary_species)}")

Number of primary aqueous species: 100


# Secondary Species


The secondary species are controlled by equilibrium reactions, and their structure in the database is as follows:

- Name
- Number of stoichiometric species
- Stoichiometry
- Equilibrium constants at 8 temperatures
- Debye-Huckel size parameter
- Charge
- Molar mass


In [6]:
secondary_lines: list[str] = get_file_contents("secondary_species").splitlines()
secondary_species: dict[str, pt.SecondarySpecies] = {}
for secondary_line in secondary_lines:
    tokens: list[str] = secondary_line.split()
    name, rest = tokens[0].strip("'"), tokens[1:]
    num_stoich, rest = int(rest[0]), rest[1:]
    stoich_tokens, rest = rest[0 : 2 * num_stoich], rest[2 * num_stoich :]
    eq_const_tokens, rest = rest[0:8], rest[8:]
    dh_size_param, rest = float(rest[0]), rest[1:]
    charge, rest = float(rest[0]), rest[1:]
    molar_mass, rest = float(rest[0]), rest[1:]

    stoich: dict[str, float] = {
        x.strip("'"): float(y) for x, y in zip(stoich_tokens[1::2], stoich_tokens[::2])
    }
    eq_consts: list[float] = [float(x) for x in eq_const_tokens]
    secondary_species[name] = pt.SecondarySpecies(
        name=name,
        stoichiometry=stoich,
        eq_consts=eq_consts,
        dh_size_param=dh_size_param,
        charge=charge,
        molar_mass=molar_mass,
    )


In [7]:
secondary_species["CO2(aq)"]

SecondarySpecies(name='CO2(aq)', stoichiometry={'H2O': -1.0, 'H+': 1.0, 'HCO3-': 1.0}, eq_consts=[-6.5804, -6.3447, -6.2684, -6.3882, -6.7235, -7.1969, -7.7868, -8.528], dh_size_param=3.0, charge=0.0, molar_mass=44.0098)

# Mineral species

- Name
- Molar volume in cm^3/mol
- Number of stoichiometric species
- Stoichiometry
- Equilibrium constants at 8 temperatures
- Molar mass in g/mol


In [13]:
mineral_lines: list[str] = get_file_contents("mineral_species").splitlines()
mineral_species: dict[str, pt.MineralSpecies] = {}
for mineral_line in mineral_lines:
    tokens: list[str] = mineral_line.split()
    name, rest = tokens[0].strip("'"), tokens[1:]
    molar_volume, rest = float(rest[0]), rest[1:]
    try:
        num_stoich, rest = int(rest[0]), rest[1:]
    except Exception as e:
        print(name)
        raise e
    stoich_tokens, rest = rest[0 : 2 * num_stoich], rest[2 * num_stoich :]
    eq_const_tokens, rest = rest[0:8], rest[8:]
    molar_mass, rest = float(rest[0]), rest[1:]
    stoich: dict[str, float] = {
        x.strip("'"): float(y) for x, y in zip(stoich_tokens[1::2], stoich_tokens[::2])
    }
    eq_consts: list[float] = [float(x) for x in eq_const_tokens]
    mineral_species[name] = pt.MineralSpecies(
        name=name,
        molar_volume=molar_volume,
        stoichiometry=stoich,
        eq_consts=eq_consts,
        molar_mass=molar_mass,
    )


# Mineral kinetics


The mineral kinetic reactions are annoying because they do not follow a good structure. They are really in an annoying, non-computer readable format. Note: I am ignoring all reactions of type "irreversible", because we have never used these in BioRT.


In [48]:
kinetic_section_lines = get_file_contents("mineral_kinetic_parameters").splitlines()
kinetic_sections: list[list[str]] = []
while len(kinetic_section_lines) > 0:
    if kinetic_section_lines[0].startswith("+---"):
        kinetic_section_lines.pop(0)
    else:
        section: list[str] = []
        while not kinetic_section_lines[0].startswith("+---"):
            section.append(kinetic_section_lines.pop(0))

        kinetic_sections.append(section)

tst_sections: list[list[str]] = []
monod_sections: list[list[str]] = []

for section in kinetic_sections:
    for line in section:
        if line in section:
            if "type" in line:
                if "monod" in line:
                    monod_sections.append(section)
                    break
                elif "tst" in line:
                    tst_sections.append(section)
                    break
                else:
                    break

# Convert the TST sections
tst_reactions: dict[str, dict[str, pt.TstReaction]] = {}
for section in tst_sections:
    name: str = section[0].strip()
    data_lines = section[1:]
    label: str = ""
    rate: float = -999
    dependence: dict[str, float] = {}
    for line in data_lines:
        if "label" in line:
            label = line.split("=")[1].strip()
        elif "dependence" in line:
            dep_tokens: list[str] = line.split(":")
            if len(dep_tokens) > 1:
                dep_str = dep_tokens[1]
                dep_tokens: list[str] = dep_str.split()
                for species_name, dep_coeff in zip(dep_tokens[::2], dep_tokens[1::2]):
                    dependence[species_name] = float(dep_coeff)
        elif "rate(25C)" in line:
            rate: float = float(line.split()[2])

    rxn = pt.TstReaction(
        mineral_name=name, label=label, dependence=dependence, rate_constant=rate
    )

    if name in tst_reactions:
        tst_reactions[name][label] = rxn
    else:
        tst_reactions[name] = {label: rxn}


# Convert the Monod sections
monod_reactions: dict[str, dict[str, pt.MonodReaction]] = {}
for section in monod_sections:
    name: str = section[0].strip()
    data_lines = section[1:]
    label: str = ""
    rate: float = -999
    monod_const: dict[str, float] = {}
    inhib_const: dict[str, float] = {}
    for line in data_lines:
        if "label" in line:
            label = line.split("=")[1].strip()
        elif "monod_terms" in line:
            dep_tokens: list[str] = line.split(":")[1].split()
            for species_name, dep_coeff in zip(dep_tokens[::2], dep_tokens[1::2]):
                monod_const[species_name] = float(dep_coeff)
        elif "inhib_terms" in line:
            dep_tokens: list[str] = line.split(":")[1].split()
            for species_name, dep_coeff in zip(dep_tokens[::2], dep_tokens[1::2]):
                inhib_const[species_name] = float(dep_coeff)
        elif "rate(25C)" in line:
            rate: float = float(line.split()[2])

    rxn = pt.MonodReaction(
        mineral_name=name,
        label=label,
        monod_terms=monod_const,
        inhib_terms=inhib_const,
        rate_constant=rate,
    )

    if name in monod_reactions:
        monod_reactions[name][label] = rxn
    else:
        monod_reactions[name] = {label: rxn}

In [49]:
tst_reactions["Calcite"]["h+"]

TstReaction(mineral_name='Calcite', label='h+', rate_constant=-3.0, dependence={'H+': 1.0})

In [52]:
monod_reactions["SOC"]

{'konza': MonodReaction(mineral_name='SOC', label='konza', rate_constant=-12.6, monod_terms={'SOC': 6e-10}, inhib_terms={})}

# Exchange reactions


# Constructing the database


In [None]:
exchange_reactions = NotImplemented
surface_complexation_reactions = NotImplemented

db = pt.ChemicalDatabase(
    primary_species=primary_species,
    secondary_species=secondary_species,
    mineral_species=mineral_species,
    tst_reactions=NotImplemented,
    monod_reactions=NotImplemented,
    surface_complexation_reactions=surface_complexation_reactions,
    exchange_reactions=exchange_reactions,
)