# Analysis of datasets - source: WorkBench model

Mission: verify the integrity of a system model

Objectives: 
- use pattern matching to detect the inconsistencies,
- use graph analysis to display correction suggestions,
- allocate the correction of the system model to a user.

Note: the RDF dataset shall be 
- renamed: *.trig
- available through a query endpoint

In [80]:
# import necessary libraries 
import pandas as pd
from pandas.io.json import json_normalize
from SPARQLWrapper import SPARQLWrapper, JSON

import ipywidgets as widgets
from ipywidgets import Layout
from IPython.display import Javascript

from urllib.request import Request, urlopen
from urllib.error import URLError, HTTPError

In [81]:
# definition of a function: sparql query
def query_fusekiData(sparql_query, sparql_service_url):
    """
    Query the endpoint with the given query string and return the results as a pandas Dataframe.
    """
    # create the connection to the endpoint
    sparql = SPARQLWrapper(sparql_service_url)
    
    sparql.setQuery(sparql_query)
    sparql.setReturnFormat(JSON)

    # ask for the result
    result = sparql.query().convert()
    return json_normalize(result["results"]["bindings"])

In [99]:
# definition of ipywidget 'Text' to prompt the  endpoint to query
layout = widgets.Layout(width='auto', height='40px') #set width and height of the different widgets
endpoint=widgets.Text(
    value='http://localhost:3030/.',
    placeholder='http://localhost:3030/',
    description='Endpoint: ',
    layout = layout,
    disabled=False
)
display(endpoint)

Text(value='http://localhost:3030/.', description='Endpoint: ', layout=Layout(height='40px', width='auto'), pl…

In [83]:
# definition of ipywidget 'Button' to take into account the endpoint value prompted
output = widgets.Output()
def on_button_clicked(b):
    with output:
        display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.ncells())'))
        
b = widgets.Button(
    description='Analyze dataset', 
    layout = layout,
    disabled=False)
display(b)
b.on_click(on_button_clicked)

Button(description='Analyze dataset', layout=Layout(height='40px', width='auto'), style=ButtonStyle())

In [103]:
# verification of the endpoint URL
req = Request(endpoint.value)
try:
    response = urlopen(req)
except HTTPError as e:
    print('Error - Endpoint: ' +endpoint.value + ' not found. Verify URL. (Error code: ', e.code, ')')
    raise SystemExit
except URLError as e:
    print('Error - Server not reached. (Reason: ', e.reason, ')')
    raise SystemExit
else:
    print ('Endpoint: ' +endpoint.value + ' found. Processing results now...')

Error - Endpoint: http://localhost:3030/dataset not found. Verify URL. (Error code:  404 )


SystemExit: 

# Rule #1: Each Subsystem shall be supplied by exactly one WorkPackage

In [None]:
# sparql workpackage query
workpackageQuery = """
PREFIX project: <http://imce.jpl.nasa.gov/foundation/project/project#>
PREFIX oml: <http://def.seegrid.csiro.au/ontology/om/om-lite#>
PREFIX owl:   <http://www.w3.org/2002/07/owl#>
PREFIX rdfs:  <http://www.w3.org/2000/01/rdf-schema#>
PREFIX fse:   <http://imce.jpl.nasa.gov/discipline/fse/fse#>
PREFIX base:  <http://imce.jpl.nasa.gov/foundation/base/base#>
PREFIX analysis: <http://imce.jpl.nasa.gov/foundation/analysis/analysis#>
PREFIX mission: <http://imce.jpl.nasa.gov/foundation/mission/mission#>
PREFIX pr: <http://purl.org/ontology/prv/core#>
PREFIX pro: <http://purl.org/hpi/patchr#>

SELECT DISTINCT ?subSystem ?workPackage

FROM <urn:x-arq:UnionGraph>
WHERE
{
  ?a a fse:Subsystem ;
     base:hasCanonicalName ?subSystem .
  
  optional {
    ?b a project:WorkPackage ;
       base:hasCanonicalName ?workPackage ;
       project:supplies ?a .
  } 
}
"""
if query_fusekiData(workpackageQuery, endpoint.value).empty:
    print('Error - The pattern matching query returned no results. Verify OWL data.')
    raise SystemExit
else:
    result_table = query_fusekiData(workpackageQuery, endpoint.value)

In [None]:
if 'workPackage.value' not in result_table.columns:
    print('Error - No WorkPackage found. Verify OWL data.')
    raise SystemExit
else:
    obj = result_table[["subSystem.value", "workPackage.value"]]
    obj = obj.rename(columns = lambda col: col.replace(".value", ""))

## Problem is splitted into 2 subparts:

### a/ Is there subsytems not supplied by any WorkPackage?

In [None]:
sol_a = obj.workPackage.isnull().sum() # select rows based on condition 

print('Number of subsystems not supplied by any WorkPackage: '+str(sol_a))
sol_a = obj[obj['workPackage'].isnull()] 
sol_a = sol_a['subSystem'].tolist()

if not sol_a:
    print('The model complies with the rule. No action needs to be performed.')
else:
    print('\nList of subsystems currently not supplied by any WorkPackage: ')
    for el in sol_a:
        print ("- " +el)
    print("\nSuggestion: Connect each of these subsystems to one WorkPackage, in the Mass Manifest.")

### b/ Is there subystems supplied by more than one WorkPackage?

In [None]:
# generate "supplies" relationships between a workPackage and a subsystem
#obj = obj.append({'subSystem' : 'Payload' , 'workPackage' : 'WPx'} , ignore_index=True)
#obj = obj.append({'subSystem' : 'EPS' , 'workPackage' : 'WPx'} , ignore_index=True)

In [None]:
sol_b = obj.groupby(["subSystem"]).size()
sol_b = sol_b.to_frame("workPackage_count")

sol_b = sol_b[sol_b['workPackage_count'] > 1] # select rows based on condition 
print('Number of subsystems supplied by more than one WorkPackage: '+str(sol_b.shape[0]))

if sol_b.empty:
    print('The model complies with the rule. No action needs to be performed.')
else:
    print('\nList of subsystems currently supplied by more than one WorkPackage: ')
    for i in range(sol_b.shape[0]):
        print('- Subsystem ' +list(sol_b.index)[i] +' has relationship(s) with ' +str(list(sol_b.workPackage_count)[i]) +' different WorkPackages: ')
        for j in range((obj[obj['subSystem'] == list(sol_b.index)[i]]).shape[0]):
            print('  o ' +list(obj[obj['subSystem'] == list(sol_b.index)[i]].workPackage)[j])
    print("\nSuggestion: Look for these subsytems in the Mass Manifest and delete the extra relationships.")

# Rule #2: Each power load assembly shall be connected to at least one power source assembly

Note 1: the power type of the assemblies is determined by the type of their end circuit interface (PowerIn or PowerOut).

Note 2:
1. Solar arrays provide power to the EPS regulators and converters
2. EPS regulators and converters provide power to the batteries
3. Batteries provide power to the EPS regulators and converters 
4. EPS regulators and converters provide power to all the other power load components

In [104]:
# sparql powerAssembly query
powerAssemblyQuery = """
PREFIX fse:   <http://imce.jpl.nasa.gov/discipline/fse/fse#>
PREFIX mission: <http://imce.jpl.nasa.gov/foundation/mission/mission#>
PREFIX base:  <http://imce.jpl.nasa.gov/foundation/base/base#>
PREFIX rdfs:  <http://www.w3.org/2000/01/rdf-schema#>

SELECT DISTINCT ?loadAssemblyID ?loadAssemblyName ?functionID ?sourceAssemblyName ?sourceAssemblyID 
FROM <urn:x-arq:UnionGraph>
WHERE {
  { ?powerLoadEndCircuitClass rdfs:label "PowerIn" } UNION { ?powerLoadEndCircuitClass rdfs:label "PowerInOut" }.
  ?powerLoadEndCircuit a ?powerLoadEndCircuitClass;
                       fse:hasEndCircuitIdentifier ?powerLoadEndCircuitID.

  ?mission_presentsA mission:hasPresentsTarget ?powerLoadEndCircuit;
                     mission:hasPresentsSource ?loadAssembly.
  ?loadAssembly base:hasCanonicalName ?loadAssemblyName;
                fse:hasAssemblyReferenceDesignator ?loadAssemblyID.

  optional {
    { ?powerSourceEndCircuitClass rdfs:label "PowerOut" } UNION { ?powerSourceEndCircuitClass rdfs:label "PowerInOut" }.
    ?powerSourceEndCircuit a ?powerSourceEndCircuitClass;
                           fse:hasEndCircuitIdentifier ?powerSourceEndCircuitID. 

    ?mission_presentsB mission:hasPresentsTarget ?powerSourceEndCircuit;
                       mission:hasPresentsSource ?sourceAssembly.
    ?sourceAssembly base:hasCanonicalName ?sourceAssemblyName;
                    fse:hasAssemblyReferenceDesignator ?sourceAssemblyID.

    {
      ?fse_connects1 a fse:ConnectsAssembly1 ;
                     fse:hasConnectsAssembly1Source ?function ;
                     fse:hasConnectsAssembly1Target ?loadAssembly.
      ?fse_connects2 a fse:ConnectsAssembly2 ;
                     fse:hasConnectsAssembly2Source ?function ;
                     fse:hasConnectsAssembly2Target ?sourceAssembly.
    } UNION {
      ?fse_connects1 a fse:ConnectsAssembly1 ;
                     fse:hasConnectsAssembly1Source ?function ;
                     fse:hasConnectsAssembly1Target ?sourceAssembly.
      ?fse_connects2 a fse:ConnectsAssembly2 ;
                     fse:hasConnectsAssembly2Source ?function ;
                     fse:hasConnectsAssembly2Target ?loadAssembly.
    }
    ?function fse:hasFunctionNumber ?functionID . 
  }
}
"""
if query_fusekiData(powerAssemblyQuery, endpoint.value).empty:
    print('Error - The pattern matching query returned no results. Verify OWL data.')
    raise SystemExit
else:
    result_table = query_fusekiData(powerAssemblyQuery, endpoint.value)

EndPointNotFound: EndPointNotFound: it was impossible to connect with the endpoint in that address, check if it is correct. 

Response:
b'Error 404: Not Found\n'

In [None]:
if 'loadAssemblyName.value' not in result_table.columns:
    print('Error - No Power Load Assembly found. Verify OWL data.')
    raise SystemExit
else:
    obj = result_table[["loadAssemblyName.value", "loadAssemblyID.value", "functionID.value", "sourceAssemblyName.value", "sourceAssemblyID.value"]]
    obj = obj.rename(columns = lambda col: col.replace(".value", ""))

In [None]:
sol = obj.sourceAssemblyID.isnull().sum() # select rows based on condition 
print('Number of power load assemblies not connected to any power source assembly: '+str(sol))

if sol==0:
    print('The model complies with the rule. No action needs to be performed.')
else:
    sol = obj[obj['sourceAssemblyID'].isnull()] 
    sol = sol[["loadAssemblyName", "loadAssemblyID"]]
    print('\nList of power load assemblies not connected to any power source assembly: ' )
    for i in range(sol.shape[0]):
        print('Name: ' +sol.iloc[i, 0] +' - ID: ' +sol.iloc[i, 1])
    print('\nSuggestion: Use functions to connect each of these assemblies to at least one power source assembly, from the Function List Table.')

# Rule #3: Each power load end circuit interface shall be connected to one power source end circuit interface

In [None]:
# sparql powerInterface query
powerInterfaceQuery = """
PREFIX fse:   <http://imce.jpl.nasa.gov/discipline/fse/fse#>
PREFIX mission: <http://imce.jpl.nasa.gov/foundation/mission/mission#>
PREFIX base:  <http://imce.jpl.nasa.gov/foundation/base/base#>
PREFIX rdfs:  <http://www.w3.org/2000/01/rdf-schema#>

SELECT DISTINCT ?powerLoadEndCircuitID ?loadAssemblyID ?loadAssemblyName ?functionID ?powerSourceEndCircuitID
FROM <urn:x-arq:UnionGraph>
WHERE {
  { ?powerLoadEndCircuitClass rdfs:label "PowerIn" } UNION { ?powerLoadEndCircuitClass rdfs:label "PowerInOut" }.
  ?powerLoadEndCircuit a ?powerLoadEndCircuitClass;
                       fse:hasEndCircuitIdentifier ?powerLoadEndCircuitID.

  ?mission_presentsA mission:hasPresentsTarget ?powerLoadEndCircuit;
                     mission:hasPresentsSource ?loadAssembly.
  ?loadAssembly base:hasCanonicalName ?loadAssemblyName;
                fse:hasAssemblyReferenceDesignator ?loadAssemblyID. 

  optional {
    ?fse_joins1 a mission:Joins1 ;
                mission:hasJoins1Source ?function ;
                mission:hasJoins1Target ?powerLoadEndCircuit. 
    ?function fse:hasFunctionNumber ?functionID.
    ?fse_joins2 a mission:Joins2 ;
                mission:hasJoins2Source ?function ;
                mission:hasJoins2Target ?powerSourceEndCircuit.
    ?powerSourceEndCircuit a ?powerSourceEndCircuitClass;
                           fse:hasEndCircuitIdentifier ?powerSourceEndCircuitID.}

  optional {
    ?fse_joins2 a mission:Joins2 ;
                mission:hasJoins2Source ?function ;
                mission:hasJoins2Target ?powerLoadEndCircuit. 
    ?function fse:hasFunctionNumber ?functionID.
    ?fse_joins1 a mission:Joins1 ;
                mission:hasJoins1Source ?function ;
                mission:hasJoins1Target ?powerSourceEndCircuit.
    ?powerSourceEndCircuit a ?powerSourceEndCircuitClass;
                           fse:hasEndCircuitIdentifier ?powerSourceEndCircuitID.}
}
"""
if query_fusekiData(powerInterfaceQuery, endpoint.value).empty:
    print('Error - The pattern matching query returned no results. Verify OWL data.')
    raise SystemExit
else:
    result_table = query_fusekiData(powerInterfaceQuery, endpoint.value)    

In [None]:
if 'loadAssemblyName.value' not in result_table.columns:
    print('Error - No Power Load Assembly found. Verify OWL data.')
    raise SystemExit
else:
    obj = result_table[["functionID.value", "loadAssemblyName.value", "loadAssemblyID.value", "powerLoadEndCircuitID.value", "powerSourceEndCircuitID.value"]]
    obj = obj.rename(columns = lambda col: col.replace(".value", ""))

In [None]:
sol = obj.powerSourceEndCircuitID.isnull().sum() # select rows based on condition 
print('Number of power load end circuit interfaces not connected to any power source end circuit interface: '+str(sol))

if sol==0:
    print('The model complies with the rule. No action needs to be performed.')
else:
    sol = obj[obj['powerSourceEndCircuitID'].isnull()] 
    sol = sol[["loadAssemblyName", "loadAssemblyID"]]
    print('\nList of the power load assemblies with a missing interface connection with a power source assembly: ')
    for i in range(sol.shape[0]):
        print('Name: ' +sol.iloc[i, 0] +' - ID: ' +sol.iloc[i, 1])
    print('\nSuggestion: Look for these assemblies and connect their end circuit interfaces to one interface of at least a power source assembly, from the Function List Table.')