# GKM Ontology conversion of llm derived triplets

In [1]:
from text2graph.schema import RelationshipTriples
from pathlib import Path
import json
import dotenv
import logging


logging.basicConfig(level=logging.INFO)
dotenv.load_dotenv()


shakopee_triplet_path = Path("../data/shakopee_triplet.json")  
try:
    with open(shakopee_triplet_path, 'r') as f:
        shakopee_triplet = json.load(f)
    logging.info("loaded shakopee_triplet from disk")
        
except FileNotFoundError:
    logging.warning("no shakopee_triplet file found, calling SERPAPI...")
    raw_llm_output = ("Shakopee", "Minnesota", "is_in")
    subject, object, predicate = raw_llm_output
    triplet = RelationshipTriples(subject=subject, object=object, predicate=predicate)
    shakopee_triplet = triplet.dict()
    with open(shakopee_triplet_path, "w") as f:
        json.dump(shakopee_triplet, f)

shakopee_triplet

INFO:root:loaded shakopee_triplet from disk


{'subject': {'strat_name': 'Shakopee',
  'strat_name_long': 'Shakopee Formation',
  'rank': 'Fm',
  'strat_name_id': 1876,
  'concept_id': 10342,
  'bed': '',
  'bed_id': 0,
  'mbr': '',
  'mbr_id': 0,
  'fm': 'Shakopee',
  'fm_id': 1876,
  'subgp': '',
  'subgp_id': 0,
  'gp': 'Prairie du Chien',
  'gp_id': 2622,
  'sgp': '',
  'sgp_id': 0,
  'b_age': 479.625,
  't_age': 475.775,
  'b_period': 'Ordovician',
  't_period': 'Ordovician',
  'c_interval': '',
  't_units': 10,
  'ref_id': 1},
 'predicate': 'is_in',
 'object': {'name': 'Minnesota',
  'lat': 46.729552999999996,
  'lon': -94.6858998}}

In [2]:
from text2graph.gkm import triplet_to_gkm

gkm_list = triplet_to_gkm(shakopee_triplet)
gkm_list


[{'name': 'xdd:ShakopeeFormation'},
 {'rdf:type': 'gsgu:Formation'},
 {'gsoc:hasQuality': {'xdd:fm': {'gsoc:hasValue': '"Shakopee"'}}},
 {'gsoc:hasQuality': {'xdd:gp': {'gsoc:hasValue': '"Prairie du Chien"'}}},
 {'gsoc:hasQuality': {'xdd:b_age': {'gsoc:hasValue': '479.625'}}},
 {'gsoc:hasQuality': {'xdd:t_age': {'gsoc:hasValue': '475.775'}}},
 {'gsoc:hasQuality': {'xdd:b_period': {'gsoc:hasValue': '"Ordovician"'}}},
 {'gsoc:hasQuality': {'xdd:t_period': {'gsoc:hasValue': '"Ordovician"'}}},
 {'xdd:isIn': {'gsoc:SpatialLocation': {'gsoc:hasValue': '"Minnesota"'}}},
 {'gsoc:hasQuality': {'gsoc:SpatialLocation': {'gsoc:hasValue': '"Minnesota"'}}},
 {'gsoc:hasValue': {'gsoc:SpatialLocation': {'gsoc:hasValue': {'gsoc:SpatialValue': '(POINT 46.729552999999996 -94.6858998)'}}}}]

In [4]:
from text2graph.gkm import gkm_to_string

result = gkm_to_string(gkm_list)
print(result)

xdd:ShakopeeFormation
  rdf:type gsgu:Formation ;
  gsoc:hasQuality [
     xdd:fm gsoc:hasValue [
       "Shakopee" ; 
    ] ;     
  ] ;  
  gsoc:hasQuality [
     xdd:gp gsoc:hasValue [
       "Prairie du Chien" ; 
    ] ;     
  ] ;  
  gsoc:hasQuality [
     xdd:b_age gsoc:hasValue [
       479.625 ; 
    ] ;     
  ] ;  
  gsoc:hasQuality [
     xdd:t_age gsoc:hasValue [
       475.775 ; 
    ] ;     
  ] ;  
  gsoc:hasQuality [
     xdd:b_period gsoc:hasValue [
       "Ordovician" ; 
    ] ;     
  ] ;  
  gsoc:hasQuality [
     xdd:t_period gsoc:hasValue [
       "Ordovician" ; 
    ] ;     
  ] ;  
  xdd:isIn gsoc:SpatialLocation [
     gsoc:hasValue [
       "Minnesota" ; 
    ] ;     
  ] ;  
  gsoc:hasQuality [
     gsoc:SpatialLocation [
       gsoc:hasValue [
         "Minnesota" ; 
      ] ;       
    ] ;     
  ] ;  
  gsoc:hasValue [
     gsoc:SpatialLocation [
       gsoc:hasValue [
         gsoc:SpatialValue [
           (POINT 46.729552999999996 -94.6858998) ; 
    