Skip to content

Commit

Permalink
0.1 version basically done
Browse files Browse the repository at this point in the history
  • Loading branch information
cmutel committed Oct 21, 2019
1 parent aa125b8 commit 6c903d6
Show file tree
Hide file tree
Showing 7 changed files with 184 additions and 24 deletions.
2 changes: 1 addition & 1 deletion perdu/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
__version__ = (0, 1)

from .filesystem import base_dir
from .filesystem import base_dir, export_dir
from .searching import (
search_gs1,
search_gs1_disjoint,
Expand Down
45 changes: 41 additions & 4 deletions perdu/assets/templates/file.html
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ <h4>File {{ filename }}:</h4>
</div>
<div class="five columns" id="export-section" style="display: none">
<p> Export current matches:</p>
<input type="button" id="export-ttl" class="button-primary" value="Export as TTL" />
<input type="button" id="export-jsonld" class="button-primary" value="Export as JSON-LD" />
<input type="button" id="export-ttl" class="button" value="Export as TTL" />
<input type="button" id="export-jsonld" class="button" value="Export as JSON-LD" />
</div>

<table class="u-full-width" id="search_results_table">
Expand Down Expand Up @@ -100,13 +100,15 @@ <h4>Refine query</h4>
<script type="text/javascript">
var catalog = null;
var match_data = new Map();
match_data['hash'] = "{{ hash }}";
var match_type = "exact";
var source_row = null;
var match_row = null;


(function set_initial_catalog() {
catalog = "{{ catalogues[0] }}";
match_data['catalog'] = "{{ catalogues[0] }}";
})();

// Handle match type selection
Expand Down Expand Up @@ -136,6 +138,7 @@ <h4>Refine query</h4>
}

function toggle_catalog_selection(button) {
match_data['catalog'] = button.target.value;
var all_buttons = document.querySelectorAll('.catalog-selection');
Array.prototype.forEach.call(all_buttons, function(elements, index) {
if (button.target.value === elements.value) {
Expand All @@ -147,7 +150,6 @@ <h4>Refine query</h4>
});
}


// Query catalog and reset result table
function query(query_string){
var opts = {
Expand Down Expand Up @@ -268,17 +270,30 @@ <h4>Refine query</h4>
modal.style.display = "block";
};


// Export as TTL
document.getElementById("export-ttl").onclick = function (event) {
var xhr = new XMLHttpRequest();
var url = "/export/ttl";
xhr.open("POST", url, true);
xhr.setRequestHeader("Content-Type", "application/json");

xhr.onreadystatechange=function() {
if (xhr.readyState==4 && xhr.status==200){
var fp = JSON.parse(xhr.responseText).fp;
var button = document.getElementById("export-ttl");
var new_form = document.createElement('form');
new_form.style = "margin: 0; padding: 0; display:inline";
new_form.innerHTML = '<input display="inline" type="submit" class="button-primary" value="Download TTL" />';
new_form.action = "/download/" + fp;
button.replaceWith(new_form);
}
}

var data = JSON.stringify(match_data);
xhr.send(data);
}

// Export as JSON-LD
document.getElementById("export-jsonld").onclick = function (event) {
var xhr = new XMLHttpRequest();
var url = "/export/jsonld";
Expand All @@ -288,6 +303,28 @@ <h4>Refine query</h4>
xhr.send(data);
}

document.getElementById("export-jsonld").onclick = function (event) {
var xhr = new XMLHttpRequest();
var url = "/export/jsonld";
xhr.open("POST", url, true);
xhr.setRequestHeader("Content-Type", "application/json");

xhr.onreadystatechange=function() {
if (xhr.readyState==4 && xhr.status==200){
var fp = JSON.parse(xhr.responseText).fp;
var button = document.getElementById("export-jsonld");
var new_form = document.createElement('form');
new_form.style = "margin: 0; padding: 0; display:inline";
new_form.innerHTML = '<input display="inline" type="submit" class="button-primary" value="Download JSON-LD" />';
new_form.action = "/download/" + fp;
button.replaceWith(new_form);
}
}

var data = JSON.stringify(match_data);
xhr.send(data);
}

// Populate table with search results as the search field is updated.
var table = document.getElementById('refine-search-field');
table.addEventListener('input', function (evt) {
Expand Down
3 changes: 3 additions & 0 deletions perdu/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@

base_dir = Path(appdirs.user_data_dir("perdu-search", "perdu"))
base_dir.mkdir(exist_ok=True, parents=True)

export_dir = base_dir / "exports"
export_dir.mkdir(exist_ok=True, parents=True)
59 changes: 59 additions & 0 deletions perdu/semantic_web.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from .filesystem import export_dir
from rdflib import Literal, RDF, URIRef, Namespace, Graph
from rdflib.namespace import DC, RDFS, OWL, SKOS


verb_mapping = {
"exact": OWL.sameAs,
"approximate": SKOS.related,
"narrower": SKOS.narrower,
"broader": SKOS.broader,
}


def write_matching_to_rdf(data, format="turtle", extension="ttl"):
g = Graph()

olca = Namespace("http://greendelta.github.io/olca-schema/context.jsonld#")

g.bind("olca", "http://greendelta.github.io/olca-schema/context.jsonld")
g.bind("dc", DC)
g.bind("owl", OWL)
g.bind("skos", SKOS)

olca_object = olca.Flow if data["catalog"] == "gs1" else olca.Process

# Start by describing what we are linking against (only those elements used)
node_dict = {}
for key in (key for key in data if key.startswith("row-")):
for o in data[key]["matches"]:
match = o["data"]
if match["code"] not in node_dict:
uri = "http://perdu.data/{}/{}".format(data["catalog"], match["code"])
node = URIRef(uri)
g.add((node, RDF.type, olca_object))
g.add((node, DC.title, Literal(match["name"])))
g.add((node, RDFS.label, Literal(match["name"])))
g.add((node, DC.description, Literal(match["description"])))
node_dict[match["code"]] = node

# Now describe our links
for key in (key for key in data if key.startswith("row-")):
uri = "http://perdu.data/source/{}".format(data["hash"])
node = URIRef(uri)
g.add((node, RDF.type, olca.Flow))
g.add((node, RDFS.label, Literal(data[key]["source"])))
g.add((node, DC.publisher, Literal("perdu.data")))
g.add((node, DC.creator, URIRef("https://github.com/cmutel/perdu")))
for match in data[key]["matches"]:
g.add(
(node, verb_mapping[match["method"]], node_dict[match["data"]["code"]])
)

fp = export_dir / "{}.{}.{}".format(data["hash"], data["catalog"], extension)
if fp.is_file():
fp.unlink()

with open(fp, "wb") as f:
g.serialize(f, format=format, encoding="utf-8")
return fp
56 changes: 37 additions & 19 deletions perdu/webapp.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
from . import (
search_gs1_disjoint,
search_gs1,
base_dir,
export_dir,
File,
search_corrector_gs1,
search_naics_disjoint,
search_naics,
search_corrector_naics,
search_useeio_disjoint,
search_useeio,
search_corrector_useeio,
base_dir,
File,
search_gs1,
search_gs1_disjoint,
search_naics,
search_naics_disjoint,
search_useeio,
search_useeio_disjoint,
)
from .ingestion import mapping
from .semantic_web import write_matching_to_rdf
from flask import (
abort,
flash,
Expand Down Expand Up @@ -51,8 +53,12 @@ def allowed_file(filename):


# search_mapping = {"naics": search_naics_disjoint, "gs1": search_gs1_disjoint, 'useeio': search_useeio_disjoint}
search_mapping = {"naics": search_naics, "gs1": search_gs1, 'useeio': search_useeio}
corrector_mapping = {"naics": search_corrector_naics, "gs1": search_corrector_gs1, 'useeio': search_corrector_useeio}
search_mapping = {"naics": search_naics, "gs1": search_gs1, "useeio": search_useeio}
corrector_mapping = {
"naics": search_corrector_naics,
"gs1": search_corrector_gs1,
"useeio": search_corrector_useeio,
}


@perdu_app.route("/", methods=["GET", "POST"])
Expand Down Expand Up @@ -102,9 +108,17 @@ def search():
@perdu_app.route("/export/<method>", methods=["POST"])
def export_linked_data(method):
content = request.get_json()
import pprint
pprint.pprint(content)
return ""
if method == "ttl":
fp = write_matching_to_rdf(content)
elif method == "jsonld":
fp = write_matching_to_rdf(content, "json-ld", "json")
return jsonify({"fp": fp.name})


@perdu_app.route("/download/<path>", methods=["GET"])
def download_export(path):
fp = export_dir / path
return send_file(fp, as_attachment=True)


@perdu_app.route("/file/<hash>", methods=["GET"])
Expand All @@ -115,8 +129,12 @@ def uploaded_file(hash):
raise (404)
data = mapping[file.kind](file.filepath)
return render_template(
"file.html", title="File: {}".format(file.name), filename=file.name, data=data,
catalogues=list(search_mapping)
"file.html",
title="File: {}".format(file.name),
filename=file.name,
data=data,
catalogues=list(search_mapping),
hash=hash,
)


Expand Down Expand Up @@ -154,11 +172,11 @@ def upload():


def normalize_search_results(result):
if 'brick' in result:
if "brick" in result:
return {
'description': result.pop("definition"),
'name': result.pop('brick'),
'class': result.pop("klass"),
"description": result.pop("definition"),
"name": result.pop("brick"),
"class": result.pop("klass"),
}
else:
return result
Expand Down
27 changes: 27 additions & 0 deletions tests/fixtures/match_fixture.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
test_data = {
"row-0": {
"matches": [
{
"data": {
"code": "336612",
"description": "This U.S. industry comprises establishments primarily engaged in building boats.",
"name": "Boat Building",
"score": 14.394085818147092,
},
"method": "broader",
},
{
"data": {
"code": "327910",
"description": "This industry comprises establishments primarily engaged in manufacturing abrasive produts",
"name": "Abrasive Product Manufacturing",
"score": 9.026339256926837,
},
"method": "exact",
},
],
"source": "Plastic polytetrafluoroethylene race liner",
},
"catalog": "USEEIO",
"hash": "a90895a4c0e1b6f9e19e59555fb231b3593b9d3890a7d2520248760c43a9ed73",
}
16 changes: 16 additions & 0 deletions tests/test_basic_export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from fixtures.match_fixture import test_data
from pathlib import Path
from perdu.semantic_web import write_matching_to_rdf
import tempfile


def test_ttl_export(monkeypatch):
with tempfile.TemporaryDirectory() as td:
monkeypatch.setattr("perdu.semantic_web.export_dir", Path(td))
write_matching_to_rdf(test_data)


def test_jsonld_export(monkeypatch):
with tempfile.TemporaryDirectory() as td:
monkeypatch.setattr("perdu.semantic_web.export_dir", Path(td))
write_matching_to_rdf(test_data, "json-ld", "json")

0 comments on commit 6c903d6

Please sign in to comment.