In [110]:
from rdflib import ConjunctiveGraph, URIRef, Literal, BNode
import requests

from IPython.display import display, Markdown

from rdflib import Namespace
from rdflib.namespace import DCTERMS, OWL, RDF, RDFS, XMLNS, XSD

PROV = Namespace("http://www.w3.org/ns/prov#")

api_key = "your_API_key"
galaxy_url = "https://usegalaxy.fr"
http_params = {'key': api_key}
galaxy_hist_url = galaxy_url + "/api/histories"

In [120]:
def list_histories(url, key):
    """
    """
    res = []
    r = requests.get(url + "/api/histories", params={'key': str(key)})
    histories = r.json()
    if r.json().get('err_msg'):
        print(r.json()['err_msg'])
        return res
        
    for h in histories:
        res.append((h['id'], h['name']))
    return res


def print_histories(url, key):
    """
    """
    histories = list_histories(url, key)
    md = """
| ID | NAME |
| ------ | ------ |
"""
    for h in histories:
        md += '| ' + h[0] + ' | ' + h[1] + ' |\n'
        
    display(Markdown(md))
    

def gen_prov_graph(url, key, hist_id):
    """
    """
    G = ConjunctiveGraph()
    
    res = []
    
    # get all histories 
    r = requests.get(url + "/api/histories", params={'key': str(key)})
    if r.json().get('err_msg'):
        print(r.json()['err_msg'])
        return G
    
    histories = r.json()
    for h in histories:
        if hist_id in h['id']:
            content_url = url + "/api/histories/" + hist_id + "/contents"
            
            # get the content of the history
            r = requests.get(content_url, params={'key': str(key)})
            content = r.json()
            
            for c in content:
                
                content_id = c["id"]
                # get provenance from each history cell
                r = requests.get(content_url + "/" + content_id + "/provenance", params={'key': str(key)})
                activity = r.json()
                job_id = activity['job_id']
                tool_id = activity['tool_id'].strip(" ")
                params = activity['parameters']
                
                ############
                # generating the activity
                G.add( (URIRef(job_id), RDF.type, PROV.Activity) )
                G.add( (URIRef(job_id), PROV.wasAssociatedWith, Literal(tool_id)) )
                
                r = requests.get(url + "/api/jobs/" + hist_id, params={'key': str(key)})
                job = r.json()
                #print(job)
                G.add( (URIRef(job_id), PROV.startedAtTime, Literal(job["create_time"], datatype=XSD.dateTime)) )
                G.add( (URIRef(job_id), PROV.endedAtTime, Literal(job["update_time"], datatype=XSD.dateTime)) )
                
                ############
                # generating the output
                G.add( (URIRef(content_id), RDF.type, PROV.Entity) )
                G.add( (URIRef(content_id), PROV.wasGeneratedBy, URIRef(job_id)) )
                G.add( (URIRef(content_id), PROV.wasAttributedTo, Literal(tool_id)) )
                
                if c.get("name"):
                    G.add( (URIRef(content_id), RDFS.label, Literal(c["name"])) )
                if c.get("download_url"):
                    G.add( (URIRef(content_id), RDFS.label, Literal(c["download_url"])) )
                    
                ############
                # generating the inputs
                for k in params.keys():
                    if "input" in k:
                        if isinstance(params[k], dict) and params[k].get('id'):
                            input_id = params[k]['id']
                            r = requests.get(url + "/api/datasets/" + input_id, params={'key': str(key)})
                            in_dataset = r.json()
                            
                            G.add( (URIRef(input_id), RDF.type, PROV.Entity) )
                            if in_dataset.get("name"):
                                G.add( (URIRef(input_id), RDFS.label, Literal(in_dataset["name"])) )
                            if in_dataset.get("download_url"):
                                G.add( (URIRef(input_id), RDFS.label, Literal(in_dataset["download_url"])) )
                            
                            G.add( (URIRef(job_id), PROV.used, URIRef(input_id)) )
                            G.add( (URIRef(content_id), PROV.wasDerivedFrom, URIRef(input_id)) )
                    
            return G
            
    return None

In [121]:
print_histories(galaxy_url, api_key)

Provided API key is not valid.



| ID | NAME |
| ------ | ------ |


In [122]:
graph = gen_prov_graph(galaxy_url, api_key, '58ba5445bfcae9c6')
print(graph.serialize(format="turtle").decode())

Provided API key is not valid.
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .


