Skip to content

Commit

Permalink
Merge branch 'master' into TESTING_updates_kwq
Browse files Browse the repository at this point in the history
  • Loading branch information
kquick committed Feb 6, 2023
2 parents 50acf77 + 082d610 commit 7e213b2
Show file tree
Hide file tree
Showing 92 changed files with 2,941 additions and 520 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/assemble-files.yml
Expand Up @@ -85,7 +85,7 @@ jobs:
- name: Download SemTK
shell: bash
run: |
curl -LSfs https://github.com/ge-semtk/semtk/releases/download/v2.5.0-20221117/semtk-opensource-v2.5.0-20221117-dist.tar.gz -o RACK/rack-box/files/semtk.tar.gz
curl -LSfs https://github.com/ge-semtk/semtk/releases/download/v2.5.0-20230110/semtk-opensource-v2.5.0-20230110-dist.tar.gz -o RACK/rack-box/files/semtk.tar.gz
- name: Download CSS stylesheet
shell: bash
Expand Down
6 changes: 5 additions & 1 deletion .github/workflows/build-virtual-machine.yml
Expand Up @@ -52,7 +52,7 @@ jobs:
- name: Split rack-box virtual machine
run: |
cd RACK/rack-box
mv output-output-virtualbox-ovf rack-box-${{ inputs.version }}
mv output-virtualbox-ovf rack-box-${{ inputs.version }}
zip -r rack-box-${{ inputs.version }}.zip rack-box-${{ inputs.version }}
split -b 1500m rack-box-${{ inputs.version }}.zip rack-box-${{ inputs.version }}.zip
rm rack-box-${{ inputs.version }}.zip
Expand All @@ -66,6 +66,10 @@ jobs:
RACK/rack-box/GitHub-Release-README.md
RACK/rack-box/rack-box-${{ inputs.version }}.zip*
# softprops/action-gh-release has many issues and PRs filed
# against it; replace it with "gh release upload" if CI fails
# run: gh release upload ${{ github.event.release.tag_name }} RACK/rack-box/GitHub-Release-README.md RACK/rack-box/rack-box-${{ inputs.version }}.zip* --clobber

- name: Upload split virtual machine to release
uses: softprops/action-gh-release@v1
if: github.event_name == 'release'
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/continuous.yml
Expand Up @@ -51,7 +51,7 @@ jobs:
./assist/bin/check
- name: Lint shell scripts
uses: ludeeus/action-shellcheck@1.1.0
uses: ludeeus/action-shellcheck@2.0.0
env:
SHELLCHECK_OPTS: -x -P assist/databin -e SC1008

Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Expand Up @@ -68,3 +68,5 @@ cli/.project
/Turnstile-Example/Turnstile-IngestionPackage/CounterApplicationImplementation/*.o
rack-ui/cache/
rack-ui/.project
EntityResolution/.project
EntityResolution/Resolutions/Summary.csv
20 changes: 20 additions & 0 deletions EntityResolution/CheckBar.py
@@ -0,0 +1,20 @@
#!/usr/bin/python3

from tkinter import *

class Checkbar(Frame):
def __init__(self, parent=None, picks=[], side=LEFT, anchor=W, command=None):
Frame.__init__(self, parent)
self.command = command
self.vars = {}
self.buttons = []
for pick in picks:
self.buttons.append(Checkbutton(self, text=pick, command=lambda pick=pick: self.callback(pick)))
self.buttons[-1].pack(side=side, anchor=anchor, expand=YES)
self.vars[pick] = True
self.buttons[-1].select()
def callback(self, pick):
self.vars[pick] = not self.vars[pick]
self.command()
def state(self):
return self.vars
35 changes: 35 additions & 0 deletions EntityResolution/CreateIngestion.py
@@ -0,0 +1,35 @@
#!/usr/bin/env python3
import DataAccess as da
import tkinter.filedialog as fd
import shutil
import os
import os.path
DEBUG = False
def Debug(*args):
if DEBUG:
print(*args)
#####################################
# Queries
#####################################

#####################################
# helper Functions
#####################################
def createIngestion(decisions):
saveLocation = fd.asksaveasfilename(filetypes=[("Manifest File","*.zip")], defaultextension =".zip")
print("Saving Manifest File to {}".format(saveLocation))
tempFolder = os.path.splitext(saveLocation)[0]
shutil.copytree("manifest_template", tempFolder)

with open(os.path.join(tempFolder, "resolutions","SAME_AS.csv"), "w") as outfile:
outfile.write("primary_identifier,primary_THING_type,secondary_identifier, secondary_THING_type\n")
for p in decisions:
#print(decisions[p] )
if decisions[p] != 4 and decisions[p] != 5:
for s in decisions[p]:
if decisions[p][s] == 2 or decisions[p][s] == 3:
print("Primary:{}".format(p))
print("Secondary:{}".format(s))
outfile.write('"{}","{}!","{}","{}!"\n'.format(da.getIdentifier(p), da.getType(p), da.getIdentifier(s), da.getType(s)))
shutil.make_archive(tempFolder, 'zip', tempFolder)

127 changes: 127 additions & 0 deletions EntityResolution/DataAccess.py
@@ -0,0 +1,127 @@
#!/usr/bin/env python3
import os
import json
import semtk3
import os.path
import time
import RACK_CONSTANTS as rc
def cacheData(e):
guid = e.split("#")[-1]
graph = "http://rack001/Data"
res = semtk3.query_raw_sparql(rc.dataQuery\
.replace("{{GUID}}",guid) \
.replace("{{GRAPH}}",graph),\
result_type=semtk3.RESULT_TYPE_GRAPH_JSONLD)
with open("cache/"+guid+".json", "w") as dataFile:
json.dump(res, dataFile, indent = 4)

def getRelationships(e):
relationships = []
guid = e.split("#")[-1]
data = getData(e)["@graph"]
if type(data) == list:
identData = {}
for el in data:
identData[el['@id']] = el['PROV_S:identifier']
for el in data:
if el['@id'][6:] == guid:
for p in el:
if type(el[p]) == dict:
relationships.append((p, identData[el[p]['@id']], "Outgoing"))
else:
for p in el:
if type(el[p]) == dict:
relationships.append((p, el['PROV_S:identifier'], "Incoming"))
return relationships

def getDataProperties(e):
dataProperties = []
guid = e.split("#")[-1]
data = getData(e)["@graph"]
if type(data) == list:
for el in data:
if el['@id'][6:] == guid:
for p in el:
if type(el[p]) != dict:
dataProperties.append((p, el[p]))
break
else:
for p in data:
if type(data[p]) != dict:
dataProperties.append((p, el[p]))
return dataProperties

def getDescription(e):
guid = e.split("#")[-1]
data = getData(e)["@graph"]
if type(data) == list:
for el in data:
if el['@id'][6:] == guid:
if 'PROV_S:description' in el:
return el['PROV_S:description']
else:
return None
else:
if 'PROV_S:description' in data:
return data['PROV_S:description']
else:
return None

def getType(e):
guid = e.split("#")[-1]
data = getData(e)["@graph"]
context = None
if "@context" in getData(e):
context = getData(e)["@context"]
elif "@context" in data:
context = data['@context']
else:
print("ERROR: Could not find context from data graph!!!")
print("{}".format())
if type(data) == list:
for el in data:
if el['@id'][6:] == guid:
if '@type' in el:
ns, _type = el['@type'].split(":")
return context[ns]+_type
else:
return None
else:
if '@type' in data:
ns, _type = data['@type'].split(":")
return context[ns]+_type
else:
return None

def getIdentifier(e):
guid = e.split("#")[-1]
data = getData(e)["@graph"]
if type(data) == list:
for el in data:
if el['@id'][6:] == guid:
if 'PROV_S:identifier' in el:
return el['PROV_S:identifier']
else:
return None
else:
if 'PROV_S:identifier' in data:
return data['PROV_S:identifier']
else:
return None

def getData(e):
guid = e.split("#")[-1]
data = None
if not os.path.exists("cache/"+guid+".json"):
cacheData(e)
#This is to handle the case with multiprocssing where one thread has created the file but not yet populated with data
while os.path.getsize("cache/"+guid+".json") ==0:
time.sleep(0.1)
with open("cache/"+guid+".json", "r") as dataFile:
data = json.load(dataFile)
if "@graph" not in data:
data = {"@graph":data}
return data

if __name__ == "__main__":
semtk3.upload_owl("Model.owl", rc.connStringSource2, model_or_data=semtk3.SEMTK3_CONN_DATA, conn_index = 0)
55 changes: 55 additions & 0 deletions EntityResolution/Entity.py
@@ -0,0 +1,55 @@
#!/usr/bin/env python3
import DataAccess as da
import tkinter as tk
from tkinter import ttk
class Entity(tk.Frame):

uri = None
def __init__(self, updateCallback):
super().__init__()
self.updateCallback = updateCallback
self.propertyString = ''

self.properties = ttk.Treeview(self, selectmode='browse')
self.properties["columns"]=["Property","Value"]
self.properties["show"]="headings"
self.properties.heading("Property", text="Property")
self.properties.heading("Value", text="Value")
self.properties.column("Property", width=200, stretch=tk.NO)
self.properties.bind('<ButtonRelease-1>', self.selectProperty)
self.properties.pack(fill=tk.X, expand=True)

self.relationships = ttk.Treeview(self, selectmode='none')
self.relationships["columns"]=["Relationship","Identifier","Direction"]
self.relationships["show"]="headings"
self.relationships.heading("Identifier", text="Identifier")
self.relationships.heading("Relationship", text="Relationship")
self.relationships.heading("Direction", text="Direction")
self.relationships.column("Relationship", width=200, stretch=tk.NO)
self.relationships.pack(fill=tk.X, expand=True)
'''===================================================
Callback for selecting property for an Entity
==================================================='''
def selectProperty(self,a):
currItem = self.properties.focus()
if self.properties.item(currItem)['values'] != "": #if nothing is selected then we just stop, otherwise update the property string the run the call back to update the text box
self.propertyString = self.properties.item(currItem)['values'][1]
self.updateCallback()

def update(self, e):

self.propertyString = ''
# Clear ListView
for item in self.relationships.get_children():
self.relationships.delete(item)
for item in self.properties.get_children():
self.properties.delete(item)
print(e)
if e !=None:
properties = da.getDataProperties(e)
relationships = da.getRelationships(e)
## Update Relationships and Properties
for k in properties:
self.properties.insert("", 'end', values=k)
for k in relationships:
self.relationships.insert("", 'end', values=k)

0 comments on commit 7e213b2

Please sign in to comment.