Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
russell-d-e committed Jan 10, 2023
2 parents 53be788 + b1d995e commit 8a01ff1
Show file tree
Hide file tree
Showing 11 changed files with 70 additions and 40 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/assemble-files.yml
Expand Up @@ -85,7 +85,7 @@ jobs:
- name: Download SemTK
shell: bash
run: |
curl -LSfs https://github.com/ge-semtk/semtk/releases/download/v2.5.0-20221212/semtk-opensource-v2.5.0-20221212-dist.tar.gz -o RACK/rack-box/files/semtk.tar.gz
curl -LSfs https://github.com/ge-semtk/semtk/releases/download/v2.5.0-20230110/semtk-opensource-v2.5.0-20230110-dist.tar.gz -o RACK/rack-box/files/semtk.tar.gz
- name: Download CSS stylesheet
shell: bash
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Expand Up @@ -68,3 +68,5 @@ cli/.project
/Turnstile-Example/Turnstile-IngestionPackage/CounterApplicationImplementation/*.o
rack-ui/cache/
rack-ui/.project
EntityResolution/.project
EntityResolution/Resolutions/Summary.csv
8 changes: 0 additions & 8 deletions EntityResolution/Resolutions/Summary.csv

This file was deleted.

2 changes: 1 addition & 1 deletion RACK-Ontology/ontology/EntityResolution.sadl
@@ -1,4 +1,4 @@
uri "http://research.ge.com/semtk/entityResolution" alias entityResolution.
uri "http://research.ge.com/semtk/EntityResolution" alias EntityResolution.


SameAs is a top-level class,
Expand Down
8 changes: 4 additions & 4 deletions RACK-Ontology/ontology/RESOLUTIONS.sadl
Expand Up @@ -17,14 +17,14 @@

uri "http://arcos.rack/RESOLUTIONS" alias Rs.
import "http://arcos.rack/PROV-S".
import "http://research.ge.com/semtk/entityResolution".
import "http://research.ge.com/semtk/EntityResolution".


SAME_AS (note "Used to create curation relationships between two nodes. When two THINGs are connected via the SAME_AS relationship it means that the THINGs are actually describing the same. SAME_AS relationships will be collapsed into a single THING by the resolution process.") is a type of NODE.
primary (note "The primary THING is the one which will remain after the merge processes any conflicts will be resolved by using the primary's value, for example the resulting identifier will be the identifier from the primary") describes SAME_AS with a single value of type THING.
secondary (note "the secondary THINGs are the entity that will be removed during the resolution process, any attributes that do not conflict will be copied to the to the primary,") describes SAME_AS with values of type THING.

// Make SAME_AS compatible with semTK entity resolution functions.
SAME_AS is a type of entityResolution:SameAs.
primary is a type of entityResolution:target.
secondary is a type of entityResolution:duplicate.
SAME_AS is a type of EntityResolution:SameAs.
primary is a type of EntityResolution:target.
secondary is a type of EntityResolution:duplicate.
2 changes: 1 addition & 1 deletion assist/databin/ar
Expand Up @@ -33,7 +33,7 @@ fi

if (( creating )) ; then
outf=${!archive_file_idx}
rackf="$(dirname ${outf})/.$(basename ${outf}).rack"
rackf="$(dirname "${outf}")/.$(basename "${outf}").rack"

(
export IFS=","
Expand Down
26 changes: 21 additions & 5 deletions cli/rack/__init__.py
Expand Up @@ -73,6 +73,7 @@ def __str__(self) -> str:
return self.value

DEFAULT_BASE_URL: Url = Url("http://localhost")
DEFAULT_OPTIMIZE_URL: Url = Url("http://localhost:8050/optimize")

MODEL_GRAPH: Url = Url("http://rack001/model")
DEFAULT_DATA_GRAPH = Url("http://rack001/data")
Expand Down Expand Up @@ -362,7 +363,8 @@ def ingest_manifest_driver(
triple_store_type: Optional[str],
clear: bool,
default_graph: bool,
top_level: bool = True) -> None:
top_level: bool = True,
optimization_url: Optional[Url] = None) -> None:

with open(manifest_path, mode='r', encoding='utf-8-sig') as manifest_file:
manifest = Manifest.fromYAML(manifest_file)
Expand Down Expand Up @@ -411,9 +413,12 @@ def ingest_manifest_driver(
if top_level:
if manifest.getCopyToDefaultGraph():
defaultGraph = Url("uri://DefaultGraph")
for graph in manifest.modelgraphsFootprint:

if clear:
clear_driver(base_url, [defaultGraph], None, triple_store, triple_store_type, Graph.MODEL)
for graph in manifest.getModelgraphsFootprint():
utility_copygraph_driver(base_url, triple_store, triple_store_type, graph, defaultGraph)
for graph in manifest.datagraphsFootprint:
for graph in manifest.getDatagraphsFootprint():
utility_copygraph_driver(base_url, triple_store, triple_store_type, graph, defaultGraph)

if manifest.getPerformEntityResolution():
Expand All @@ -423,7 +428,17 @@ def go() -> dict:
go()

if manifest.getPerformOptimization():
logger.warning("Optimization requested but not yet implemented")
invoke_optimization(optimization_url)

def invoke_optimization(url: Optional[Url]) -> None:
url = url or DEFAULT_OPTIMIZE_URL
@with_status(f'Optimizing triplestore')
def go() -> None:
response = requests.get(str(url)).json()
if not response['success']:
raise Exception(response['message'])
go()


def ingest_data_driver(config_path: Path, base_url: Url, model_graphs: Optional[List[Url]], data_graphs: Optional[List[Url]], triple_store: Optional[Url], triple_store_type: Optional[str], clear: bool) -> None:
"""Use an import.yaml file to ingest multiple CSV files into the data graph."""
Expand Down Expand Up @@ -698,7 +713,7 @@ def dispatch_utility_copygraph(args: SimpleNamespace) -> None:

def dispatch_manifest_import(args: SimpleNamespace) -> None:
"""Implementation of manifest import subcommand"""
ingest_manifest_driver(Path(args.config), args.base_url, args.triple_store, args.triple_store_type, args.clear, args.default_graph)
ingest_manifest_driver(Path(args.config), args.base_url, args.triple_store, args.triple_store_type, args.clear, args.default_graph, True, args.optimize_url)

def dispatch_data_import(args: SimpleNamespace) -> None:
"""Implementation of the data import subcommand"""
Expand Down Expand Up @@ -791,6 +806,7 @@ def get_argument_parser() -> argparse.ArgumentParser:
manifest_import_parser.add_argument('config', type=str, help='Manifest YAML file')
manifest_import_parser.add_argument('--clear', action='store_true', help='Clear footprint before import')
manifest_import_parser.add_argument('--default-graph', action='store_true', help='Load whole manifest into default graph')
manifest_import_parser.add_argument('--optimize-url', type=str, help='RACK UI optimization endpoint (e.g. http://localhost:8050/optimize)')
manifest_import_parser.set_defaults(func=dispatch_manifest_import)

data_import_parser.add_argument('config', type=str, help='Configuration YAML file')
Expand Down
2 changes: 1 addition & 1 deletion cli/requirements.txt
Expand Up @@ -12,7 +12,7 @@ PyYAML==5.4.1
requests==2.28.1
Pillow==9.0.1
plotly==5.9.0
semtk-python3 @ git+https://github.com/ge-semtk/semtk-python3@e3b73e5881bf222b0b7c7cd98b02f3984444e39e
semtk-python3 @ git+https://github.com/ge-semtk/semtk-python3@3794a10ba5c2065b145d88f074a7e52028c21cdb
six==1.16.0
tabulate==0.8.10
urllib3==1.26.10
Expand Down
8 changes: 6 additions & 2 deletions rack-ui/pages/home.py
Expand Up @@ -5,17 +5,21 @@
from .helper import *
import pandas as pd

# name of default graph
DEFAULT_GRAPH_NAME = "uri://DefaultGraph"

def layout():
""" Provide the layout in a function, so that it is refreshed every time the page is displayed """

# get table with graph names and triple counts
df = pd.DataFrame(get_graph_info().get_pandas_data())
df.rename(columns={'graph': 'Graph', 'triples': '# Triples'}, inplace=True) # rename columns for display
df = df.replace(DEFAULT_GRAPH_NAME,'Optimized graph')

layout = html.Div(children=[
layout = dbc.Spinner(html.Div(children=[
html.H2('Welcome to RACK.'),
dcc.Markdown('Current graphs in RACK:', style={"margin-top": "50px"}),
dbc.Table.from_dataframe(df, color="primary", bordered=True, size="sm", style={"width": "auto"}),
])
]))

return layout
38 changes: 24 additions & 14 deletions rack-ui/pages/load.py
Expand Up @@ -19,22 +19,25 @@
MANIFEST_FILE_NAME = "manifest.yaml"

# div showing load details and buttons to load data or open SPARQLgraph
load_div = html.Div(
load_div = dbc.Spinner(html.Div(
[
dcc.Markdown("", id="load-div-message"),
dbc.Row([
dbc.Col(html.Button("Load data", id="load-button", n_clicks=0), width="auto"), # load button
dbc.Col(dbc.DropdownMenu(
[
dbc.DropdownMenuItem("Target graphs", href="", target="_blank", id="sparqlgraph-button"),
dbc.DropdownMenuItem("Optimized graph", href="", target="_blank", id="sparqlgraph-default-button")
], label="View in SPARQLgraph", toggle_class_name="ddm"), width="auto")
])
html.Table(
html.Tr([
html.Td(dcc.Markdown("", id="load-div-message"), style={"padding-right": "20px"}),
html.Td([
html.Button("Load data", id="load-button", n_clicks=0), # load button
dbc.DropdownMenu([
dbc.DropdownMenuItem("Target graphs", href="", target="_blank", id="sparqlgraph-button"),
dbc.DropdownMenuItem("Optimized graph", href="", target="_blank", id="sparqlgraph-default-button")
], label="View data", toggle_class_name="ddm")
])
])
)
],
id="load-div",
hidden=True,
style={"margin-top": "50px"},
)
))

# dialog indicating unzip error (e.g. no manifest)
unzip_error_dialog = dbc.Modal(
Expand Down Expand Up @@ -128,10 +131,14 @@ def run_unzip(zip_file_contents, turnstile_clicks):
package_description = ""
if manifest.getDescription() != None and manifest.getDescription().strip() != '':
package_description = f"({manifest.getDescription()})"
additional_actions = []
if manifest.getCopyToDefaultGraph(): additional_actions.append("copy to optimized graph")
if manifest.getPerformEntityResolution(): additional_actions.append("resolve entities")
if manifest.getPerformOptimization(): additional_actions.append("optimize triple store")
package_info = f"Data: `{manifest.getName()} {package_description}` \n" + \
f"Target model graphs: `{', '.join(manifest.getModelgraphsFootprint())}` \n" + \
f"Target data graphs: `{', '.join(manifest.getDatagraphsFootprint())}` \n" + \
f"Copy to optimized graph? `{'Yes' if manifest.getCopyToDefaultGraph() else 'No'}`"
f"Additional actions: `{', '.join(additional_actions) if len(additional_actions) > 0 else 'None'}`"

# generate a file in which to capture the ingestion status
status_filepath = get_temp_dir_unique("output")
Expand Down Expand Up @@ -210,8 +217,11 @@ def update_status(n, status_filepath):
prevent_initial_call=True
)
def manage_load_div(load_message, load_clicks):
""" Show or hide the load div (currently nothing triggers hiding it) """
return False # show if message is set
""" Show or hide the load div """
if len(load_message) > 0:
return False # show the div
else:
return True # hide the div

@callback(Output("unzip-error-dialog", "is_open"),
Input("unzip-error-dialog-body", "children"),
Expand Down
12 changes: 9 additions & 3 deletions rack-ui/pages/verify.py
Expand Up @@ -5,8 +5,12 @@
from dash import html, dcc, callback, Input, Output, State
import dash_bootstrap_components as dbc
import semtk3
import json
from .helper import *

# name of default graph
DEFAULT_GRAPH_NAME = "uri://DefaultGraph"

# dialog confirming ASSIST verification done
verify_assist_done_dialog = dbc.Modal(
[
Expand All @@ -23,7 +27,7 @@
)

# div showing graphs list
verify_report_options_div = html.Div(
verify_report_options_div = dbc.Spinner(html.Div(
[
dcc.Markdown("Select graphs to include in report:"),
dcc.Checklist([], [], id="verify-graph-checklist", labelStyle={'display': 'block'}, inputStyle={"margin-right": "10px"}), # choose which graphs to verify
Expand All @@ -35,7 +39,7 @@
id="verify-report-options-div",
hidden=True,
style={"margin-top": "50px"},
)
))

# dialog indicating an error generating the SPARQLgraph report (e.g. no graphs selected)
verify_report_error_dialog = dbc.Modal(
Expand Down Expand Up @@ -175,7 +179,9 @@ def show_report_options(button_clicks, last_loaded_graphs):
Show list of graphs for verification report, with the last loaded graphs pre-selected
"""
# get list of graphs populated in the triple store
graphs_list = get_graph_info().get_column(0)
graphs_list_values = get_graph_info().get_column(0) # list of graphs, including uri://DefaultGraph
graphs_list_labels = list(map(lambda x: x.replace(DEFAULT_GRAPH_NAME, 'Optimized graph'), graphs_list_values.copy())) # display default graph as "Optimized graph"
graphs_list = [{'label': label, 'value': val} for label, val in zip(graphs_list_labels, graphs_list_values)]

# these are the graphs last loaded - check the checkboxes for these
if last_loaded_graphs == None:
Expand Down

0 comments on commit 8a01ff1

Please sign in to comment.