Skip to content

Commit

Permalink
Make RACK CLI and RACK UI call SemTK ingestion package loading (#948)
Browse files Browse the repository at this point in the history
Co-authored-by: Eric Mertens <emertens@galois.com>
  • Loading branch information
weisenje and glguy committed Mar 28, 2023
1 parent 6ffbb43 commit a0cd804
Show file tree
Hide file tree
Showing 8 changed files with 112 additions and 138 deletions.
121 changes: 29 additions & 92 deletions cli/rack/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
# standard imports
import argparse
import csv
from contextlib import nullcontext
from enum import Enum, unique
from io import StringIO
import logging
Expand All @@ -40,7 +39,7 @@
from semtk3.semtktable import SemtkTable
import yaml

from rack.manifest import Manifest, StepType
from rack.manifest import Manifest
from rack.types import Connection, Url
from rack.defaults import *

Expand Down Expand Up @@ -518,100 +517,37 @@ def build_manifest_driver(

def ingest_manifest_driver(
manifest_path: Path,
base_url: Url,
triple_store: Optional[Url],
triple_store_type: Optional[str],
clear: bool,
top_level: bool = True,
optimize: bool,
optimization_url: Optional[Url] = None) -> None:

is_toplevel_archive = top_level and manifest_path.suffix.lower() == ".zip"

# Extract the manifest even if this is a zip file we're sending off to SemTK
if is_toplevel_archive:
with TemporaryDirectory() as tmpdir:
shutil.unpack_archive(manifest_path, tmpdir)
with open(Path(tmpdir)/"manifest.yaml", mode='r', encoding='utf-8-sig') as manifest_file:
manifest = Manifest.fromYAML(manifest_file)
else:
with open(manifest_path, mode='r', encoding='utf-8-sig') as manifest_file:
manifest = Manifest.fromYAML(manifest_file)

if is_toplevel_archive:
resp = semtk3.load_ingestion_package(
triple_store or DEFAULT_TRIPLE_STORE,
triple_store_type or DEFAULT_TRIPLE_STORE_TYPE,
manifest_path,
clear,
MODEL_GRAPH,
DEFAULT_DATA_GRAPH,
)

loglevel = logger.getEffectiveLevel()
for line_bytes in resp.iter_lines():
level, _, msg = line_bytes.decode().partition(': ')
if level == "INFO" and logging.INFO >= loglevel:
print(msg)
elif level == "DEBUG" and logging.DEBUG >= loglevel:
print("Debug: " + str_highlight(msg))
elif level == "WARNING" and logging.WARNING >= loglevel:
print(str_warn("Warning: " + msg))
elif level == "ERROR" and logging.ERROR >= loglevel:
print("Error: " + str_bad(msg))
else:
base_path = manifest_path.parent

if clear:
# clear the whole footprint
modelgraphs = manifest.getModelgraphsFootprint()
datagraphs = manifest.getDatagraphsFootprint()
if not modelgraphs == []:
clear_driver(base_url, modelgraphs, datagraphs, triple_store, triple_store_type, Graph.MODEL)
if not datagraphs == []:
clear_driver(base_url, modelgraphs, datagraphs, triple_store, triple_store_type, Graph.DATA)

if not manifest.getNodegroupsFootprint() == []:
delete_nodegroups_driver(manifest.getNodegroupsFootprint(), True, True, True, base_url)

for (step_type, step_data) in manifest.steps:
if StepType.DATA == step_type:
stepFile = base_path / step_data
ingest_data_driver(stepFile, base_url, None, None, triple_store, triple_store_type, False)
elif StepType.MODEL == step_type:
stepFile = base_path / step_data
ingest_owl_driver(stepFile, base_url, None, triple_store, triple_store_type, False)
elif StepType.NODEGROUPS == step_type:
stepFile = base_path / step_data
store_nodegroups_driver(stepFile, base_url)
elif StepType.MANIFEST == step_type:
stepFile = base_path / step_data
ingest_manifest_driver(stepFile, base_url, triple_store, triple_store_type, False, False)
elif StepType.COPYGRAPH == step_type:
utility_copygraph_driver(base_url, triple_store, triple_store_type, step_data[0], step_data[1])

if top_level:
copyToGraph = manifest.getCopyToGraph()
if copyToGraph is not None:
if clear:
clear_driver(base_url, [copyToGraph], None, triple_store, triple_store_type, Graph.MODEL)
for graph in manifest.getModelgraphsFootprint():
utility_copygraph_driver(base_url, triple_store, triple_store_type, graph, copyToGraph)
for graph in manifest.getDatagraphsFootprint():
utility_copygraph_driver(base_url, triple_store, triple_store_type, graph, copyToGraph)

resolutionGraph = manifest.getPerformEntityResolution()
if resolutionGraph is not None:
r = resolutionGraph # mypy hack: otherwise type error that in [resolutionGraph], resolution graph is still Optional[Url]
@with_status(f'Executing entity resolution')
def combine() -> dict:
return semtk3.combine_entities_in_conn(conn=sparql_connection(base_url, [r], r, [], triple_store, triple_store_type))
combine()

# SemTK doesn't support this functionality, so we do it even if we've sent the zip file over
if top_level:
defaultGraphUrls = ["uri://DefaultGraph", "urn:x-arq:DefaultGraph"]
if (triple_store_type or DEFAULT_TRIPLE_STORE_TYPE) == "fuseki" and manifest.getCopyToGraph() in defaultGraphUrls:
invoke_optimization(optimization_url)
manifest = Manifest.getToplevelManifest(manifest_path)

resp = semtk3.load_ingestion_package(
triple_store or DEFAULT_TRIPLE_STORE,
triple_store_type or DEFAULT_TRIPLE_STORE_TYPE,
manifest_path,
clear,
MODEL_GRAPH,
DEFAULT_DATA_GRAPH,
)

loglevel = logger.getEffectiveLevel()
for line_bytes in resp.iter_lines():
level, _, msg = line_bytes.decode().partition(': ')
if level == "INFO" and logging.INFO >= loglevel:
print(msg)
elif level == "DEBUG" and logging.DEBUG >= loglevel:
print("Debug: " + str_highlight(msg))
elif level == "WARNING" and logging.WARNING >= loglevel:
print(str_warn("Warning: " + msg))
elif level == "ERROR" and logging.ERROR >= loglevel:
print("Error: " + str_bad(msg))

if optimize and manifest.getNeedsOptimization(triple_store_type or DEFAULT_TRIPLE_STORE_TYPE):
invoke_optimization(optimization_url)

def invoke_optimization(url: Optional[Url]) -> None:
url = url or DEFAULT_OPTIMIZE_URL
Expand Down Expand Up @@ -896,7 +832,7 @@ def dispatch_utility_copygraph(args: SimpleNamespace) -> None:

def dispatch_manifest_import(args: SimpleNamespace) -> None:
"""Implementation of manifest import subcommand"""
ingest_manifest_driver(Path(args.config), args.base_url, args.triple_store, args.triple_store_type, args.clear, True, args.optimize_url)
ingest_manifest_driver(Path(args.config), args.triple_store, args.triple_store_type, args.clear, args.optimize, args.optimize_url)

def dispatch_manifest_build(args: SimpleNamespace) -> None:
"""Implementation of manifest import subcommand"""
Expand Down Expand Up @@ -993,6 +929,7 @@ def get_argument_parser() -> argparse.ArgumentParser:

manifest_import_parser.add_argument('config', type=str, help='Manifest YAML file')
manifest_import_parser.add_argument('--clear', action='store_true', help='Clear footprint before import')
manifest_import_parser.add_argument('--optimize', type=bool, help='Enable RACK UI optimization when available')
manifest_import_parser.add_argument('--optimize-url', type=str, help='RACK UI optimization endpoint (e.g. http://localhost:8050/optimize)')
manifest_import_parser.set_defaults(func=dispatch_manifest_import)

Expand Down
31 changes: 29 additions & 2 deletions cli/rack/manifest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
from enum import Enum
from jsonschema import validate
from rack.types import Connection, Url
import os
import os.path
from pathlib import Path
import shutil
from tempfile import TemporaryDirectory
from typing import Any, Dict, List, Tuple, Optional
import yaml
import semtk3
Expand Down Expand Up @@ -111,11 +116,11 @@ def getName(self) -> str:

def getDescription(self) -> Optional[str]:
return self.description

def getPerformEntityResolution(self) -> Optional[Url]:
"""Return target graph URL when this manifest prescribes running entity resolution"""
return self.performEntityResolution

def getCopyToGraph(self) -> Optional[Url]:
"""Return target graph URL when this manifest prescribes copying the footprint to the default graph"""
return self.copyToGraph
Expand Down Expand Up @@ -153,6 +158,28 @@ def getDefaultGraphConnection(self, triple_store: str = DEFAULT_TRIPLE_STORE, tr
"""Build a connection string using the triple store's default graph."""
return semtk3.build_default_connection_str("Default Graph", triple_store_type, triple_store)

def getNeedsOptimization(self, triple_store_type: str = DEFAULT_TRIPLE_STORE_TYPE) -> bool:
defaultGraphUrls = ["uri://DefaultGraph", "urn:x-arq:DefaultGraph"]
return triple_store_type == "fuseki" and self.getCopyToGraph() in defaultGraphUrls

@staticmethod
def getToplevelManifest(zipfile: Path) -> 'Manifest':
with TemporaryDirectory() as tmpdir_str:
tmpdir = Path(tmpdir_str)

shutil.unpack_archive(zipfile, tmpdir)

top_level_entries = os.listdir(tmpdir)
if "manifest.yaml" in top_level_entries:
manifest_path = tmpdir / "manifest.yaml"
elif len(top_level_entries) == 1 and os.path.isdir(tmpdir / top_level_entries[0]):
manifest_path = tmpdir / top_level_entries[0] / "manifest.yaml"
else:
raise FileNotFoundError("manifest.yaml")

with open(manifest_path, mode='r', encoding='utf-8-sig') as manifest_file:
return Manifest.fromYAML(manifest_file)

@staticmethod
def fromYAML(src: Any) -> 'Manifest':
"""Populate a Manifest using a YAML file following the MANIFEST_SCHEMA."""
Expand Down
6 changes: 5 additions & 1 deletion cli/setup-arcos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,8 @@ set -eu

./ensure-cli-in-PATH.sh

rack --log-level ERROR manifest import --clear ../manifests/arcos.yaml
TMP=$(mktemp -d -t ingestion_package_XXXXXX) || exit 1
trap 'rm -rf "$TMP"; trap - EXIT; exit' EXIT INT HUP

rack manifest build ../manifests/arcos.yaml "${TMP}/output"
rack manifest import --clear "${TMP}/output.zip"
6 changes: 5 additions & 1 deletion cli/setup-rack.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,8 @@ set -eu

./ensure-cli-in-PATH.sh

rack manifest import --clear ../manifests/rack.yaml
TMP=$(mktemp -d -t ingestion_package_XXXXXX) || exit 1
trap 'rm -rf "$TMP"; trap - EXIT; exit' EXIT INT HUP

rack manifest build ../manifests/rack.yaml "${TMP}/output"
rack manifest import --clear "${TMP}/output.zip"
6 changes: 5 additions & 1 deletion cli/setup-turnstile.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,8 @@ set -eu

./ensure-cli-in-PATH.sh

rack manifest import --clear ../manifests/turnstile.yaml
TMP=$(mktemp -d -t ingestion_package_XXXXXX) || exit 1
trap 'rm -rf "$TMP"; trap - EXIT; exit' EXIT INT HUP

rack manifest build ../manifests/turnstile.yaml "${TMP}/output"
rack manifest import --clear "${TMP}/output.zip"
6 changes: 1 addition & 5 deletions rack-ui/pages/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,7 @@
import subprocess

# configuration
BASE_URL = "http://localhost"
SPARQLGRAPH_BASE_URL = "http://localhost:8080"
TRIPLE_STORE_BASE_URL = "http://localhost:3030"
TRIPLE_STORE = TRIPLE_STORE_BASE_URL + "/RACK"
TRIPLE_STORE_TYPE = "fuseki"

def get_temp_dir() -> str:
""" Get a temp dir """
Expand Down Expand Up @@ -49,7 +45,7 @@ def clean_for_display(s):

def get_graph_info():
""" Gets list of graphs in the triple store, with their triple counts """
conn_str = rack.sparql_connection(BASE_URL, None, None, [], TRIPLE_STORE, TRIPLE_STORE_TYPE)
conn_str = rack.sparql_connection(rack.DEFAULT_BASE_URL, None, None, [], rack.DEFAULT_TRIPLE_STORE, rack.DEFAULT_TRIPLE_STORE_TYPE)
graph_info_table = semtk3.get_graph_info(conn_str, True, False) # True to exclude internal SemTK graphs, False to get counts too
return graph_info_table

Expand Down

0 comments on commit a0cd804

Please sign in to comment.