In [None]:
import pandas as pd 
from src.qudt import qudt_kg

In [None]:
# init qudt knowledge graph
qkg = qudt_kg()

## Define list of quantities
the list here is based on a preselection from Everton

In [None]:
quant_list = ['Acceleration', 'AmountOfSubstance', 'AmountOfSubstancePerUnitVolume','Angle', 'Area', 'Density', 'Dimensionless', 'DimensionlessRatio', 
 'DynamicViscosity', 'ElectricCurrent',  'ElectricPotential', 'Energy', 'EnergyDensity',  'Force', 'Length', 'Mass',
 'MassConcentration',  'MassFlowRate', 'Permeability', 'Power', 'Pressure', 
 'SoundPressure',  'SurfaceTension',  'Temperature', 'Time',  'Volume', 'VolumeFlowRate',  'VolumeFraction']

qudtquants = [(q, qkg.get_quant(q)) for q in quant_list if qkg.get_quant(q)]
qudtquants

## Iterate through QUDT to collect unit_systems and unit_catalogues

* iterate through quanties from list
* for each quantity find units that belong to SI or Imperial (see `unitsys`)
* each unit is validated by `PhysUnit` schema
* export all units as `unitcatalogue.json` 
* and selected for `unitsystems`

the applicabale unit systems in qudt are encoded as `qudt:applicableSystem sou:SI` and `qudt:applicableSystemsou:IMPERIAL`

!! CAUTION!!
* currelty selcection is by associated unit system
* will change the preselction to curated list

In [None]:
# an example on retrieving all units for a quantity
qkg.find_units_for_unitsystem(qudtquants[1][1] , "http://qudt.org/vocab/sou/SI")

### for each unit system retrieve relevant units


In [None]:
systems = {"SI": "http://qudt.org/vocab/sou/SI",
    "Imperial": "http://qudt.org/vocab/sou/IMPERIAL"}

unitsystem = {}

for syskey, sys_uri in systems.items():
    ulist = []
    for (qname, q_uri) in qudtquants:
        ul = qkg.find_units_for_unitsystem(q_uri, sys_uri)
        ulist = ulist + ul
        if not ul:
            print("No units found for ", qname, "\t\t in ", syskey)
    
    unitsystem[syskey] = ulist 

### Validate all units in unitcatalogue and export

In [None]:
from src.utils import to_json
from datetime import date
from src.unit_dm import PhysUnit, UnitConv
from rdflib import Namespace, URIRef, RDFS
from src.qudt import get_qudt_prefixes_inrange

dcterms = Namespace("http://purl.org/dc/terms/")

In [None]:
allowed_multiplierrange= [1e-6, 1e6]
allowed_prefixes = get_qudt_prefixes_inrange(multiplierrange= allowed_multiplierrange) + [""]

In [None]:
# validate each unit (apply schema and point triples to right field)
unit_catalogue = {}

for _, ulist in unitsystem.items():
    
    for (unit, quant) in ulist:
        #print(unit)
        unit=URIRef(unit)
        quant=URIRef(quant)
        ##print( )
        
        aliases = [o.toPython() for o in qkg.unit_graph.objects(subject=unit, predicate=RDFS.label) if o.language in ["en", "en-us", ""]] or [""]       
        conversion = UnitConv(multiplier= ([str(o) for o in qkg.unit_graph.objects(subject=unit, predicate=qkg.QUDT.conversionMultiplier)] or [1.0])[0],
                                offset = ([str(o) for o in qkg.unit_graph.objects(subject=unit, predicate=qkg.QUDT.conversionOffset)] or [0.0])[0] )
        
        ## check if prefix in allowed prefixlist and conversion in range
        ## units not perfectly curated (eg see http://qudt.org/vocab/unit/FemtoMOL)       
        unit_prefix = [p.toPython() for p in qkg.unit_graph.objects(subject=unit, predicate=qkg.QUDT.prefix)] or [""]
        if (unit_prefix[0] in allowed_prefixes) and (min(allowed_multiplierrange) <= conversion.multiplier <= max(allowed_multiplierrange)):
            #print("prefix:", unit_prefix[0], "  unit:", unit)
            pu = PhysUnit(
                    name = unit.split("/")[-1] ,
                    quantity = quant.split("/")[-1],
                    #symbol = ([o.toPython() for o in qkg.unit_graph.objects(subject=unit, predicate=qkg.QUDT.symbol)] or [None])[0],
                    longName = aliases[0],
                    aliasNames = aliases,
                    conversion = conversion,
                    #description = ([o.toPython() for o in qkg.unit_graph.objects(subject=unit, predicate=dcterms.description)] or [None])[0],
                    sourceReference = unit,
                    source = "qudt.org"
                ).model_dump()
            # print(pu)
            unit_catalogue[pu["externalId"]] = pu

In [None]:
len(unit_catalogue)

In [None]:
version = 1
subversion = 1
to_json(unit_catalogue, file="./data/V" + f"{version:01d}" + "." + f"{subversion:02d}" +  "_units.json")

### Export unitsystems

In [None]:
# load default unit systems
dudf = pd.read_excel("./data/V1_defaultunits.xlsx")

# check if refrenced units are in unitcatalogue (needs to be empty)
dudf["unit"][dudf["unit"].apply(lambda x: x not in unit_catalogue)]

In [None]:
# convert to strucutre and dump into json
default_units = []

for usys, g in dudf.groupby("usys", sort=False):
    print(usys)
    
    default_units.append(
        {"name": usys, 
        "qunatities": [{"name":  r["quantity"], "unitExternalId": r["unit"]} for _, r in g.iterrows()]
    })

to_json(default_units, file="./data/V" + f"{version:01d}" + "." + f"{subversion:02d}" +  "_unitSystems.json")