### Elemental parsing

1. If a mineral is from Macrostrat, get the formula from it
2. If a mineral is in USGS critical mineral (but not within Macrostrat), we need an extra mapping table to get the formula
3. Mineral to list of Elements
4. Filter by element? 

In [1]:
from text2graph.schema import Mineral
from text2graph.macrostrat import get_all_mineral_names
from time import sleep
from random import sample

minerals = get_all_mineral_names()

In [2]:
sel_minerals = sample(minerals, 100)

In [3]:
for mineral in sel_minerals:
    sleep(0.1)
    x = Mineral(name=mineral.title())
    await x.hydrate()

    if x.formula and not x.elements:
        print(x)



In [4]:
# Example problematic known minerals in Macrostrat
# Usually involves complex cases with group of elements substitution...

# Problem "Ln", https://en.wikipedia.org/wiki/Lanthanide
x = {
    "mineral_id": 39556,
    "mineral": "Fluorbritholite-(Y)",
    "mineral_type": "",
    "formula": "Ca2(Y,Ca,Ln)3(SiO4,PO4)3(F,OH)",
    "formula_tags": "Ca<sub>2</sub>(Y,Ca,Ln)<sub>3</sub>(SiO<sub>4</sub>,PO<sub>4</sub>)<sub>3</sub>(F,OH)",
    "url": "http://www.mindat.org/min-39556.html",
    "hardness_min": 5.5,
    "hardness_max": 5.5,
    "crystal_form": "Hexagonal",
    "mineral_color": "Light pinkish-brown to brownish-pink; light brown; dark brown",
    "lustre": "Vitreous, Resinous, Greasy",
}

# Problem: "REE"
y = {
    "mineral_id": 27087,
    "mineral": "Hellandite-(Ce)",
    "mineral_type": "",
    "formula": "(Ca,REE)4Ce2Al(Be,Li)2-xB4Si4O22(OH)2",
    "formula_tags": "(Ca,REE)<sub>4</sub>Ce<sub>2</sub>Al(Be,Li)<sub>2-x</sub>B<sub>4</sub>Si<sub>4</sub>O<sub>22</su",
    "url": "http://www.mindat.org/min-27087.html",
    "hardness_min": None,
    "hardness_max": None,
    "crystal_form": "",
    "mineral_color": "Light green",
    "lustre": "Vitreous",
}

In [5]:
# Aluminian nontronite is in Macrostrat
x = Mineral(name="Aluminian nontronite")
await x.hydrate()
x

Mineral(mineral='Aluminian nontronite', mineral_id=153, mineral_type='', formula='Na0.33Fe2.17Al0.83Si3·.17O10(OH)2', formula_tags='Na<sub>0.33</sub>Fe<sub>2.17</sub>Al<sub>0.83</sub>Si<sub>3</sub> &middot; <sub>.17</sub>O<sub>1', url='http://www.mindat.org/min-153.html', hardness_min=None, hardness_max=None, crystal_form='', mineral_color=None, lustre=None, provenance=Provenance(id=UUID('59d39b4b-7217-49be-8a7f-fe20c9572a66'), source_name='Macrostrat', source_url='https://macrostrat.org/api/defs/minerals?mineral_id=153', source_version=2, requested=datetime.datetime(2024, 6, 4, 19, 47, 56, 480123, tzinfo=<UTC>), additional_values={}, previous=None), elements=[<Element.Aluminium: 'Al'>, <Element.Iron: 'Fe'>, <Element.Hydrogen: 'H'>, <Element.Sodium: 'Na'>, <Element.Oxygen: 'O'>, <Element.Silicon: 'Si'>])

In [6]:
# Fluorspar is USGS exclusive
y = Mineral(name="Fluorspar")
await y.hydrate()
y



Mineral(mineral='Fluorspar', mineral_id=None, mineral_type=None, formula='CaF2', formula_tags=None, url=None, hardness_min=None, hardness_max=None, crystal_form=None, mineral_color=None, lustre=None, provenance=Provenance(id=UUID('4a7aef96-bf33-4ef1-bd0c-2ca5c8dda994'), source_name='Mindat', source_url='https://api.mindat.org/', source_version=1.0, requested=datetime.datetime(2024, 6, 4, 19, 47, 56, 480123, tzinfo=<UTC>), additional_values={}, previous=None), elements=[<Element.Calcium: 'Ca'>, <Element.Fluorine: 'F'>])

In [7]:
# Unknown Mineral

z = Mineral(name="Unknown Mineral Name")
await z.hydrate()
z



Mineral(mineral='Unknown Mineral Name', mineral_id=None, mineral_type=None, formula=None, formula_tags=None, url=None, hardness_min=None, hardness_max=None, crystal_form=None, mineral_color=None, lustre=None, provenance=None, elements=None)