# GKM Ontology conversion of llm derived triplets

In [1]:
from text2graph.schema import RelationshipTriples
from pathlib import Path
import json
import dotenv


dotenv.load_dotenv()
shakopee_triplet_path = Path("../data/shakopee_triplet.json")  
try:
    with open(shakopee_triplet_path, 'r') as f:
        shakopee_triplet = json.load(f)
        
except FileNotFoundError:
    raw_llm_output = ("Shakopee", "Minnesota", "is_in")
    subject, object, predicate = raw_llm_output
    triplet = RelationshipTriples(subject=subject, object=object, predicate=predicate)
    shakopee_triplet = triplet.dict()
    with open(shakopee_triplet_path, "w") as f:
        json.dump(shakopee_triplet, f)

shakopee_triplet

{'subject': {'strat_name': 'Shakopee',
  'strat_name_long': 'Shakopee Formation',
  'rank': 'Fm',
  'strat_name_id': 1876,
  'concept_id': 10342,
  'bed': '',
  'bed_id': 0,
  'mbr': '',
  'mbr_id': 0,
  'fm': 'Shakopee',
  'fm_id': 1876,
  'subgp': '',
  'subgp_id': 0,
  'gp': 'Prairie du Chien',
  'gp_id': 2622,
  'sgp': '',
  'sgp_id': 0,
  'b_age': 479.625,
  't_age': 475.775,
  'b_period': 'Ordovician',
  't_period': 'Ordovician',
  'c_interval': '',
  't_units': 10,
  'ref_id': 1},
 'predicate': 'is_in',
 'object': {'name': 'Minnesota',
  'lat': 46.729552999999996,
  'lon': -94.6858998}}

In [2]:
from text2graph.gkm import triplet_to_gkm

gkm_list = triplet_to_gkm(shakopee_triplet)
gkm_list


[{'name': 'xdd:ShakopeeFormation'},
 {'gsoc:hasQuality': {'xdd:rank': {'hasValue:': 'Fm'}}},
 {'gsoc:hasQuality': {'xdd:fm': {'hasValue:': 'Shakopee'}}},
 {'gsoc:hasQuality': {'xdd:gp': {'hasValue:': 'Prairie du Chien'}}},
 {'gsoc:hasQuality': {'xdd:b_age': {'hasValue:': 479.625}}},
 {'gsoc:hasQuality': {'xdd:t_age': {'hasValue:': 475.775}}},
 {'gsoc:hasQuality': {'xdd:b_period': {'hasValue:': 'Ordovician'}}},
 {'gsoc:hasQuality': {'xdd:t_period': {'hasValue:': 'Ordovician'}}},
 {'xdd:isIn': {'gsoc:SpatialLocation': {'gsoc:hasValue': 'Minnesota'}}},
 {'gsoc:hasQuality': {'gsoc:SpatialLocation': {'gsoc:hasValue': 'Minnesota'}}},
 {'gsoc:hasQuality': {'gsoc:SpatialLocation': {'gsoc:hasValue': {'gsocSpatialValue': '(POINT 46.729552999999996 -94.6858998'}}}}]

In [3]:
from text2graph.gkm import nested_dict_to_str
        
    
result = nested_dict_to_str(gkm_list[1])
result


'gsoc:hasQuality xdd:rank hasValue: Fm'

In [4]:
from text2graph.gkm import gkm_to_string

result = gkm_to_string(gkm_list)
result

{'name': 'xdd:ShakopeeFormation'}
{'gsoc:hasQuality': {'xdd:rank': {'hasValue:': 'Fm'}}}
{'gsoc:hasQuality': {'xdd:fm': {'hasValue:': 'Shakopee'}}}
{'gsoc:hasQuality': {'xdd:gp': {'hasValue:': 'Prairie du Chien'}}}
{'gsoc:hasQuality': {'xdd:b_age': {'hasValue:': 479.625}}}
Unnesting error: {'gsoc:hasQuality': {'xdd:b_age': {'hasValue:': 479.625}}}
{'gsoc:hasQuality': {'xdd:t_age': {'hasValue:': 475.775}}}
Unnesting error: {'gsoc:hasQuality': {'xdd:t_age': {'hasValue:': 475.775}}}
{'gsoc:hasQuality': {'xdd:b_period': {'hasValue:': 'Ordovician'}}}
{'gsoc:hasQuality': {'xdd:t_period': {'hasValue:': 'Ordovician'}}}
{'xdd:isIn': {'gsoc:SpatialLocation': {'gsoc:hasValue': 'Minnesota'}}}
{'gsoc:hasQuality': {'gsoc:SpatialLocation': {'gsoc:hasValue': 'Minnesota'}}}
{'gsoc:hasQuality': {'gsoc:SpatialLocation': {'gsoc:hasValue': {'gsocSpatialValue': '(POINT 46.729552999999996 -94.6858998'}}}}


'xdd:ShakopeeFormation ;\ngsoc:hasQuality xdd:rank hasValue: Fm ;\ngsoc:hasQuality xdd:fm hasValue: Shakopee ;\ngsoc:hasQuality xdd:gp hasValue: Prairie du Chien ;\ngsoc:hasQuality xdd:b_period hasValue: Ordovician ;\ngsoc:hasQuality xdd:t_period hasValue: Ordovician ;\nxdd:isIn gsoc:SpatialLocation gsoc:hasValue Minnesota ;\ngsoc:hasQuality gsoc:SpatialLocation gsoc:hasValue Minnesota ;\ngsoc:hasQuality gsoc:SpatialLocation gsoc:hasValue gsocSpatialValue (POINT 46.729552999999996 -94.6858998 ;\n'