0.1 version basically done

cmutel · Oct 21, 2019 · 6c903d6 · 6c903d6
1 parent aa125b8
commit 6c903d6
Show file tree

Hide file tree

Showing 7 changed files with 184 additions and 24 deletions.
diff --git a/perdu/__init__.py b/perdu/__init__.py
@@ -1,6 +1,6 @@
 __version__ = (0, 1)
 
-from .filesystem import base_dir
+from .filesystem import base_dir, export_dir
 from .searching import (
     search_gs1,
     search_gs1_disjoint,

diff --git a/perdu/assets/templates/file.html b/perdu/assets/templates/file.html
@@ -10,8 +10,8 @@ <h4>File {{ filename }}:</h4>
   </div>
   <div class="five columns" id="export-section" style="display: none">
     <p> Export current matches:</p>
-    <input type="button" id="export-ttl" class="button-primary"  value="Export as TTL" />
-    <input type="button" id="export-jsonld" class="button-primary"  value="Export as JSON-LD" />
+    <input type="button" id="export-ttl" class="button"  value="Export as TTL" />
+    <input type="button" id="export-jsonld" class="button"  value="Export as JSON-LD" />
   </div>
 
   <table class="u-full-width" id="search_results_table">
@@ -100,13 +100,15 @@ <h4>Refine query</h4>
 <script type="text/javascript">
 var catalog = null;
 var match_data = new Map();
+match_data['hash'] = "{{ hash }}";
 var match_type = "exact";
 var source_row = null;
 var match_row = null;
 
 
 (function set_initial_catalog() {
   catalog = "{{ catalogues[0] }}";
+  match_data['catalog'] = "{{ catalogues[0] }}";
 })();
 
 // Handle match type selection
@@ -136,6 +138,7 @@ <h4>Refine query</h4>
 }
 
 function toggle_catalog_selection(button) {
+    match_data['catalog'] = button.target.value;
     var all_buttons = document.querySelectorAll('.catalog-selection');
     Array.prototype.forEach.call(all_buttons, function(elements, index) {
         if (button.target.value === elements.value) {
@@ -147,7 +150,6 @@ <h4>Refine query</h4>
     });
 }
 
-
 // Query catalog and reset result table
 function query(query_string){
     var opts = {
@@ -268,17 +270,30 @@ <h4>Refine query</h4>
     modal.style.display = "block";
 };
 
-
 // Export as TTL
 document.getElementById("export-ttl").onclick = function (event) {
   var xhr = new XMLHttpRequest();
   var url = "/export/ttl";
   xhr.open("POST", url, true);
   xhr.setRequestHeader("Content-Type", "application/json");
+
+  xhr.onreadystatechange=function() {
+    if (xhr.readyState==4 && xhr.status==200){
+      var fp = JSON.parse(xhr.responseText).fp;
+      var button = document.getElementById("export-ttl");
+      var new_form = document.createElement('form');
+      new_form.style = "margin: 0; padding: 0; display:inline";
+      new_form.innerHTML = '<input display="inline" type="submit" class="button-primary" value="Download TTL" />';
+      new_form.action = "/download/" + fp;
+      button.replaceWith(new_form);
+    }
+  }
+
   var data = JSON.stringify(match_data);
   xhr.send(data);
 }
 
+// Export as JSON-LD
 document.getElementById("export-jsonld").onclick = function (event) {
   var xhr = new XMLHttpRequest();
   var url = "/export/jsonld";
@@ -288,6 +303,28 @@ <h4>Refine query</h4>
   xhr.send(data);
 }
 
+document.getElementById("export-jsonld").onclick = function (event) {
+  var xhr = new XMLHttpRequest();
+  var url = "/export/jsonld";
+  xhr.open("POST", url, true);
+  xhr.setRequestHeader("Content-Type", "application/json");
+
+  xhr.onreadystatechange=function() {
+    if (xhr.readyState==4 && xhr.status==200){
+      var fp = JSON.parse(xhr.responseText).fp;
+      var button = document.getElementById("export-jsonld");
+      var new_form = document.createElement('form');
+      new_form.style = "margin: 0; padding: 0; display:inline";
+      new_form.innerHTML = '<input display="inline" type="submit" class="button-primary" value="Download JSON-LD" />';
+      new_form.action = "/download/" + fp;
+      button.replaceWith(new_form);
+    }
+  }
+
+  var data = JSON.stringify(match_data);
+  xhr.send(data);
+}
+
 // Populate table with search results as the search field is updated.
 var table = document.getElementById('refine-search-field');
 table.addEventListener('input', function (evt) {

diff --git a/perdu/filesystem.py b/perdu/filesystem.py
@@ -3,3 +3,6 @@
 
 base_dir = Path(appdirs.user_data_dir("perdu-search", "perdu"))
 base_dir.mkdir(exist_ok=True, parents=True)
+
+export_dir = base_dir / "exports"
+export_dir.mkdir(exist_ok=True, parents=True)
diff --git a/perdu/semantic_web.py b/perdu/semantic_web.py
@@ -0,0 +1,59 @@
+from .filesystem import export_dir
+from rdflib import Literal, RDF, URIRef, Namespace, Graph
+from rdflib.namespace import DC, RDFS, OWL, SKOS
+
+
+verb_mapping = {
+    "exact": OWL.sameAs,
+    "approximate": SKOS.related,
+    "narrower": SKOS.narrower,
+    "broader": SKOS.broader,
+}
+
+
+def write_matching_to_rdf(data, format="turtle", extension="ttl"):
+    g = Graph()
+
+    olca = Namespace("http://greendelta.github.io/olca-schema/context.jsonld#")
+
+    g.bind("olca", "http://greendelta.github.io/olca-schema/context.jsonld")
+    g.bind("dc", DC)
+    g.bind("owl", OWL)
+    g.bind("skos", SKOS)
+
+    olca_object = olca.Flow if data["catalog"] == "gs1" else olca.Process
+
+    # Start by describing what we are linking against (only those elements used)
+    node_dict = {}
+    for key in (key for key in data if key.startswith("row-")):
+        for o in data[key]["matches"]:
+            match = o["data"]
+            if match["code"] not in node_dict:
+                uri = "http://perdu.data/{}/{}".format(data["catalog"], match["code"])
+                node = URIRef(uri)
+                g.add((node, RDF.type, olca_object))
+                g.add((node, DC.title, Literal(match["name"])))
+                g.add((node, RDFS.label, Literal(match["name"])))
+                g.add((node, DC.description, Literal(match["description"])))
+                node_dict[match["code"]] = node
+
+    # Now describe our links
+    for key in (key for key in data if key.startswith("row-")):
+        uri = "http://perdu.data/source/{}".format(data["hash"])
+        node = URIRef(uri)
+        g.add((node, RDF.type, olca.Flow))
+        g.add((node, RDFS.label, Literal(data[key]["source"])))
+        g.add((node, DC.publisher, Literal("perdu.data")))
+        g.add((node, DC.creator, URIRef("https://github.com/cmutel/perdu")))
+        for match in data[key]["matches"]:
+            g.add(
+                (node, verb_mapping[match["method"]], node_dict[match["data"]["code"]])
+            )
+
+    fp = export_dir / "{}.{}.{}".format(data["hash"], data["catalog"], extension)
+    if fp.is_file():
+        fp.unlink()
+
+    with open(fp, "wb") as f:
+        g.serialize(f, format=format, encoding="utf-8")
+    return fp
diff --git a/perdu/webapp.py b/perdu/webapp.py
@@ -1,17 +1,19 @@
 from . import (
-    search_gs1_disjoint,
-    search_gs1,
+    base_dir,
+    export_dir,
+    File,
     search_corrector_gs1,
-    search_naics_disjoint,
-    search_naics,
     search_corrector_naics,
-    search_useeio_disjoint,
-    search_useeio,
     search_corrector_useeio,
-    base_dir,
-    File,
+    search_gs1,
+    search_gs1_disjoint,
+    search_naics,
+    search_naics_disjoint,
+    search_useeio,
+    search_useeio_disjoint,
 )
 from .ingestion import mapping
+from .semantic_web import write_matching_to_rdf
 from flask import (
     abort,
     flash,
@@ -51,8 +53,12 @@ def allowed_file(filename):
 
 
 # search_mapping = {"naics": search_naics_disjoint, "gs1": search_gs1_disjoint, 'useeio': search_useeio_disjoint}
-search_mapping = {"naics": search_naics, "gs1": search_gs1, 'useeio': search_useeio}
-corrector_mapping = {"naics": search_corrector_naics, "gs1": search_corrector_gs1, 'useeio': search_corrector_useeio}
+search_mapping = {"naics": search_naics, "gs1": search_gs1, "useeio": search_useeio}
+corrector_mapping = {
+    "naics": search_corrector_naics,
+    "gs1": search_corrector_gs1,
+    "useeio": search_corrector_useeio,
+}
 
 
 @perdu_app.route("/", methods=["GET", "POST"])
@@ -102,9 +108,17 @@ def search():
 @perdu_app.route("/export/<method>", methods=["POST"])
 def export_linked_data(method):
     content = request.get_json()
-    import pprint
-    pprint.pprint(content)
-    return ""
+    if method == "ttl":
+        fp = write_matching_to_rdf(content)
+    elif method == "jsonld":
+        fp = write_matching_to_rdf(content, "json-ld", "json")
+    return jsonify({"fp": fp.name})
+
+
+@perdu_app.route("/download/<path>", methods=["GET"])
+def download_export(path):
+    fp = export_dir / path
+    return send_file(fp, as_attachment=True)
 
 
 @perdu_app.route("/file/<hash>", methods=["GET"])
@@ -115,8 +129,12 @@ def uploaded_file(hash):
         raise (404)
     data = mapping[file.kind](file.filepath)
     return render_template(
-        "file.html", title="File: {}".format(file.name), filename=file.name, data=data,
-        catalogues=list(search_mapping)
+        "file.html",
+        title="File: {}".format(file.name),
+        filename=file.name,
+        data=data,
+        catalogues=list(search_mapping),
+        hash=hash,
     )
 
 
@@ -154,11 +172,11 @@ def upload():
 
 
 def normalize_search_results(result):
-    if 'brick' in result:
+    if "brick" in result:
         return {
-            'description': result.pop("definition"),
-            'name': result.pop('brick'),
-            'class': result.pop("klass"),
+            "description": result.pop("definition"),
+            "name": result.pop("brick"),
+            "class": result.pop("klass"),
         }
     else:
         return result

diff --git a/tests/fixtures/match_fixture.py b/tests/fixtures/match_fixture.py
@@ -0,0 +1,27 @@
+test_data = {
+    "row-0": {
+        "matches": [
+            {
+                "data": {
+                    "code": "336612",
+                    "description": "This U.S. industry comprises establishments primarily engaged in building boats.",
+                    "name": "Boat Building",
+                    "score": 14.394085818147092,
+                },
+                "method": "broader",
+            },
+            {
+                "data": {
+                    "code": "327910",
+                    "description": "This industry comprises establishments primarily engaged in manufacturing abrasive produts",
+                    "name": "Abrasive Product Manufacturing",
+                    "score": 9.026339256926837,
+                },
+                "method": "exact",
+            },
+        ],
+        "source": "Plastic polytetrafluoroethylene race liner",
+    },
+    "catalog": "USEEIO",
+    "hash": "a90895a4c0e1b6f9e19e59555fb231b3593b9d3890a7d2520248760c43a9ed73",
+}
diff --git a/tests/test_basic_export.py b/tests/test_basic_export.py
@@ -0,0 +1,16 @@
+from fixtures.match_fixture import test_data
+from pathlib import Path
+from perdu.semantic_web import write_matching_to_rdf
+import tempfile
+
+
+def test_ttl_export(monkeypatch):
+    with tempfile.TemporaryDirectory() as td:
+        monkeypatch.setattr("perdu.semantic_web.export_dir", Path(td))
+        write_matching_to_rdf(test_data)
+
+
+def test_jsonld_export(monkeypatch):
+    with tempfile.TemporaryDirectory() as td:
+        monkeypatch.setattr("perdu.semantic_web.export_dir", Path(td))
+        write_matching_to_rdf(test_data, "json-ld", "json")