# GKM scratch

## load macrostrat unit dicts

In [1]:
import json

with open("../data/macrostrat_stratname_data.json", 'r') as f:
    sn_dcts = json.load(f)
    
sn_dct = sn_dcts[10]
sn_dct

{'unit_id': 24166,
 'section_id': 6949,
 'col_id': 1024,
 'project_id': 5,
 'col_area': 1788.19,
 'unit_name': 'Abbotsford Fm',
 'strat_name_id': 8498,
 'Mbr': '',
 'Fm': 'Abbotsford',
 'Gp': '',
 'SGp': '',
 't_age': 48.1,
 'b_age': 58.5,
 'max_thick': 0,
 'min_thick': 0,
 'outcrop': '',
 'pbdb_collections': 0,
 'pbdb_occurrences': 0,
 'lith': [{'atts': [],
   'name': 'mudstone',
   'prop': 1,
   'lith_id': 7,
   'type': 'siliciclastic',
   'class': 'sedimentary'}],
 'environ': [{'class': 'marine',
   'type': '',
   'name': 'inferred marine',
   'environ_id': 93}],
 'econ': [],
 'measure': [],
 'notes': '',
 'color': '#999999',
 'text_color': '#000000',
 't_int_id': 386,
 't_int_name': 'Heretaungan',
 't_int_age': 45.7,
 't_prop': 0.25,
 'units_above': [24167],
 'b_int_id': 389,
 'b_int_name': 'Teurian',
 'b_int_age': 66,
 'b_prop': 0.75,
 'units_below': [24165],
 'strat_name_long': 'Abbotsford Formation',
 'refs': [5],
 'clat': -46.163,
 'clng': 170.433,
 't_plat': -48.414,
 't_plng'

# types of strat units

In [2]:
from collections import Counter
c = Counter(
    [x['strat_name_long'].split()[-1] for x in sn_dcts]
)


# names of Members, Formations, Groups and SuperGroups

In [3]:
from collections import Counter

collection_dct = {k: None for k in ['Mbr', 'Fm', 'Gp', 'SGp']}

for relation_key in collection_dct.keys():
    collection_dct[relation_key] = [x[relation_key] for x in sn_dcts]


Counter(collection_dct["Fm"])

Counter({'': 2054,
         'Turi': 242,
         'Manganui': 186,
         'Farewell': 150,
         'Wapiabi': 136,
         'Whangai': 136,
         'Fernie': 117,
         'Carbondale': 117,
         'St Peter Sandstone': 116,
         'Otaraoa': 115,
         'North Cape': 111,
         'Pedra de Fogo': 110,
         'Amuri Limestone': 105,
         'Niobrara': 101,
         'Kaimiro': 96,
         'Mangahewa': 84,
         'Monterey': 83,
         'Chinle': 83,
         'Heiberg': 79,
         'Marambaia': 78,
         'Rose Hill': 77,
         'Rio Bonito': 77,
         'Ste Genevieve Limestone': 75,
         'Glenburn': 75,
         'Mohakatino': 75,
         'Cantuar': 73,
         'Carlile Shale': 73,
         'Tikorangi': 72,
         'Sulphur Mountain': 71,
         'Keyser Limestone': 70,
         'Mount Simon Sandstone': 70,
         'Urenui': 70,
         'Morrison': 69,
         'Broken River': 68,
         'Temblor': 66,
         'Kokoamu Greensand': 66,
         'Moki

## create GKM format description for one macrostrat dict

In [4]:
from text2graph.dict2ttl.convert import(
    gkm_xdd_entity_from_macrostrat_unit_dict,
    final_gkm_file
)
gxe = gkm_xdd_entity_from_macrostrat_unit_dict(macrostrat_unit_dict=sn_dcts[100])
final_gkm_str = final_gkm_file([gxe])
print(final_gkm_str)
    


@prefix xdd: <https://w3id.org/gso/1.0/ex-xddlexicon#> .
@prefix gsgu: <https://w3id.org/gso/1.0/geologicunit/> .
@prefix gsoc: <https://w3id.org/gso/1.0/common/> .
@prefix gsrm: <https://w3id.org/gso/1.0/rockmaterial/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .

xdd:AdmiralFm ;
	rdf:type gsgu:Formation ;
	gsoc:isPartOf xdd:AdmiralFm ;
	gsoc:isPartOf xdd:WichitaGp ;
	gsoc:hasConstituent xdd:shale ;
	gsoc:hasQuality gsoc: SpatialLocation [
		gsoc:hasSpatialValue [
			gsoc:WKTValue: POINT (33.733 -99.983)
		]
		gsoc:hasReferenceSystem [
			gsoc:GeographicCoordinateSystem gsoc:hasValue [
			"WGS 84"
		]
	] ;
.


# demo with llm model response

In [5]:
example_model_response = [
     {
         'name': 'Little Pybus Bay',
         'type': 'geographical-area',
         'stratigraphic_units': [
             'Kootznahoo Formation',
             'Seymour Canal Formation',
             'Cannery Formation',
             'Admiralty Island Volcanics'
          ]
      },
]

In [6]:
from text2graph.dict2ttl.entities import GKMxDDEntity
gkm_entities = []
for location_dct in example_model_response:
    for strat_name in location_dct["stratigraphic_units"]:
        gkm_entity = GKMxDDEntity(name=strat_name)
        gkm_entity.add_location_name(name=location_dct['name'])
        gkm_entities.append(gkm_entity)

print(
    final_gkm_file(gkm_entities)
)

@prefix xdd: <https://w3id.org/gso/1.0/ex-xddlexicon#> .
@prefix gsgu: <https://w3id.org/gso/1.0/geologicunit/> .
@prefix gsoc: <https://w3id.org/gso/1.0/common/> .
@prefix gsrm: <https://w3id.org/gso/1.0/rockmaterial/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .

xdd:KootznahooFormation ;
	rdf:type gsgu:Formation ;
	gsoc:hasQuality gsoc:SpatialLocation [
		gsoc:hasValue "Little Pybus Bay"
	] ;
.
xdd:SeymourCanalFormation ;
	rdf:type gsgu:Formation ;
	gsoc:hasQuality gsoc:SpatialLocation [
		gsoc:hasValue "Little Pybus Bay"
	] ;
.
xdd:CanneryFormation ;
	rdf:type gsgu:Formation ;
	gsoc:hasQuality gsoc:SpatialLocation [
		gsoc:hasValue "Little Pybus Bay"
	] ;
.
xdd:AdmiraltyIslandVolcanics ;
	rdf:type gsgu:Volcanics ;
	gsoc:hasQuality gsoc:SpatialLocation [
		gsoc:hasValue "Little Pybus Bay"
	] ;
.
