# Analysis of datasets - source: MagicDraw model

Mission: verify the integrity of a system model

Objectives: 
- use pattern matching to detect the inconsistencies,
- use graph analysis to display correction suggestions,
- allocate the correction of the system model to a user, selected as being familiar with the CAESAR WorkBench v1.23,
- assume that the test is finished when the graph analysis returns that the input model complies with the 3 different rules.

Note: the RDF dataset shall be 
- renamed: *.trig
- available through a query endpoint

In [385]:
# import necessary libraries 
import pandas as pd
from pandas.io.json import json_normalize
from SPARQLWrapper import SPARQLWrapper, JSON

import ipywidgets as widgets
from ipywidgets import Layout
from IPython.display import Javascript

from urllib.request import Request, urlopen
from urllib.error import URLError, HTTPError

import networkx as nx
import snap

In [386]:
# definition of a function: sparql query
def query_fusekiData(sparql_query, sparql_service_url):
    """
    Query the endpoint with the given query string and return the results as a pandas Dataframe.
    """
    # create the connection to the endpoint
    sparql = SPARQLWrapper(sparql_service_url)
    
    sparql.setQuery(sparql_query)
    sparql.setReturnFormat(JSON)

    # ask for the result
    result = sparql.query().convert()
    return json_normalize(result["results"]["bindings"])

In [387]:
# definition of ipywidget 'Text' to prompt the  endpoint to query
layout = widgets.Layout(width='auto', height='40px') #set width and height of the different widgets
endpoint=widgets.Text(
    value='http://localhost:3030/MDv20191130',
    placeholder='http://localhost:3030/',
    description='Endpoint: ',
    layout = layout,
    disabled=False
)
display(endpoint)

Text(value='http://localhost:3030/MDv20191130', description='Endpoint: ', layout=Layout(height='40px', width='…

In [388]:
# definition of ipywidget 'Button' to take into account the endpoint value prompted
output = widgets.Output()
def on_button_clicked(b):
    with output:
        display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.ncells())'))
        
b = widgets.Button(
    description='Analyze dataset', 
    layout = layout,
    disabled=False)
display(b)
b.on_click(on_button_clicked)

Button(description='Analyze dataset', layout=Layout(height='40px', width='auto'), style=ButtonStyle())

In [389]:
# verification of the endpoint URL
req = Request(endpoint.value)
try:
    response = urlopen(req)
except HTTPError as e:
    print('Error - Endpoint: ' +endpoint.value + ' not found. Verify URL. (Error code: ', e.code, ')')
    raise SystemExit
except URLError as e:
    print('Error - Server not reached. (Reason: ', e.reason, ')')
    raise SystemExit
else:
    print ('Endpoint: ' +endpoint.value + ' found. Processing results now...')

Endpoint: http://localhost:3030/MDv20191130 found. Processing results now...


# Rule #1: Structural decomposition graph shall be directed, acyclic, connected and rooted

In [390]:
# composition sparql query
query = """
PREFIX owl:   <http://www.w3.org/2002/07/owl#>
PREFIX rdfs:  <http://www.w3.org/2000/01/rdf-schema#>
PREFIX base:  <http://imce.jpl.nasa.gov/foundation/base/base#>
PREFIX fse-backbone: <http://imce.jpl.nasa.gov/backbone/imce.jpl.nasa.gov/discipline/fse/fse#>

SELECT DISTINCT ?physicalComponent ?isContainedIn
FROM <urn:x-arq:UnionGraph>
WHERE
{
  ?a a owl:Class ;
     rdfs:label ?b ;
     rdfs:subClassOf <http://firesat.jpl.nasa.gov/user-model/generated/md/profileExt/firesat-extensions_ID__18_5_3_8c20287_1563932039371_543598_24020#PhysicalComponent_ID__18_5_3_8c20287_1563932086646_143892_24055>.

  ?c a owl:Class ;
     rdfs:label ?physicalComponent ;
     rdfs:subClassOf <http://firesat.jpl.nasa.gov/user-model/generated/md/profileExt/firesat-extensions_ID__18_5_3_8c20287_1563932039371_543598_24020#PhysicalComponent_ID__18_5_3_8c20287_1563932086646_143892_24055>.


  optional {?a rdfs:subClassOf [a owl:Restriction ;
               owl:onProperty base:contains ;
               owl:someValuesFrom ?c] }.

  optional {?e rdfs:subClassOf [a owl:Restriction ;
               owl:onProperty base:contains ;
               owl:someValuesFrom ?c ].
    ?e rdfs:label ?isContainedIn }.
}
"""
if query_fusekiData(query, endpoint.value).empty:
    print('Error - The pattern matching query returned no results. Verify OWL data.')
    raise SystemExit
else:
    result_table = query_fusekiData(query, endpoint.value)

In [391]:
if 'physicalComponent.value' not in result_table.columns:
    print('Error - No Physical Component found. Verify OWL data.')
    raise SystemExit
else:
    obj = result_table[['physicalComponent.value', 'isContainedIn.value']]
    obj = obj.rename(columns = lambda col: col.replace(".value", ""))

In [392]:
# from dataframe to graph
objGraph = nx.from_pandas_edgelist(obj, 'physicalComponent', 'isContainedIn', create_using=nx.DiGraph())

In [393]:
# is graph directed
print("Is graph directed:", nx.is_directed(objGraph))

Is graph directed: True


In [394]:
# is graph acyclic
print("Graph is acyclic:", nx.is_directed_acyclic_graph(objGraph))
if nx.is_directed_acyclic_graph(objGraph) != True:
    print("Number of cycles:", len(list(nx.simple_cycles(objGraph))))
    print("Cycles found:")
    for el in list(nx.simple_cycles(objGraph)):
        print('- ', el)
    print("Among them, number of self-loop edges:", objGraph.number_of_selfloops())
    print("Nodes presenting a self-loop edge:" )
    for el in objGraph.nodes_with_selfloops():
        print('- ', el)
    print("\nSuggestion: Correct the graph so that it becomes acyclic.")

Graph is acyclic: True


In [395]:
# is graph connected
print("Graph is connected:", nx.is_weakly_connected(objGraph))
if nx.is_weakly_connected(objGraph) != True:
    print("Number of isolated nodes:", len(list(nx.isolates(objGraph))))
    print("Isolated nodes:")
    for el in list(nx.isolates(objGraph)):
        print('- ', el)
    print("\nSuggestion: Connect the isolated nodes.")

Graph is connected: True


In [396]:
# is graph rooted
obj = pd.DataFrame(objGraph.out_degree())
obj = obj[obj[1]==0]
print("Graph is rooted:", obj.shape[0] == 1)
if obj.shape[0] == 0:
    print("No roots found.")
    print("\nSuggestion: Correct the graph so that it presents one and only one root.")
if obj.shape[0] > 1:
    print("Number of roots found:", obj.shape[0])
    print("Roots found:")
    for el in obj[0]:
        print('-', el)
    print("\nSuggestion: Correct the graph so that it presents one and only one root.")

Graph is rooted: False
Number of roots found: 9
Roots found:
- Structures and mechanisms
- EPS
- Command and data handling
- ADCS and GNC
- Communications
- Propulsion
- Thermal control
- FIresat flight system
- Payload

Suggestion: Correct the graph so that it presents one and only one root.


# Rule #2: Each Physical Component shall be aggregated in exactly one Subsystem - WAITING FOR VERIFICATION

In [406]:
# aggregation sparql query
query = """
PREFIX owl:   <http://www.w3.org/2002/07/owl#>
PREFIX rdfs:  <http://www.w3.org/2000/01/rdf-schema#>
PREFIX base:  <http://imce.jpl.nasa.gov/foundation/base/base#>
PREFIX fse-backbone: <http://imce.jpl.nasa.gov/backbone/imce.jpl.nasa.gov/discipline/fse/fse#>

SELECT DISTINCT ?physicalComponent ?subSystem
FROM <urn:x-arq:UnionGraph>
WHERE
{
  ?a a owl:Class ;
     rdfs:label ?b ;
     rdfs:subClassOf <http://firesat.jpl.nasa.gov/user-model/generated/md/profileExt/firesat-extensions_ID__18_5_3_8c20287_1563932039371_543598_24020#PhysicalComponent_ID__18_5_3_8c20287_1563932086646_143892_24055>.

  ?c a owl:Class ;
     rdfs:label ?physicalComponent ;
     rdfs:subClassOf <http://firesat.jpl.nasa.gov/user-model/generated/md/profileExt/firesat-extensions_ID__18_5_3_8c20287_1563932039371_543598_24020#PhysicalComponent_ID__18_5_3_8c20287_1563932086646_143892_24055>.


  optional {?a rdfs:subClassOf [a owl:Restriction ;
               owl:onProperty base:aggregates ;
               owl:someValuesFrom ?c] }.

  optional {?e rdfs:subClassOf [a owl:Restriction ;
               owl:onProperty base:aggregates ;
               owl:someValuesFrom ?c ].
    ?e rdfs:label ?subSystem }.
}
"""
if query_fusekiData(query, endpoint.value).empty:
    print('Error - The pattern matching query returned no results. Verify OWL data.')
    raise SystemExit
else:
    result_table = query_fusekiData(query, endpoint.value)

In [407]:
if 'subSystem.value' not in result_table.columns:
    print('Error - No Subsystems found. Verify OWL data.')
    raise SystemExit
else:
    obj = result_table[['physicalComponent.value', 'subSystem.value']]
    obj = obj.rename(columns = lambda col: col.replace(".value", ""))

Error - No Subsystems found. Verify OWL data.


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [408]:
# from dataframe to graph
objGraph = nx.from_pandas_edgelist(obj, 'physicalComponent', 'subSystem', create_using=nx.DiGraph())

KeyError: 'physicalComponent'

### a/ Is there Physical Component not aggregated in any Subsystem?

In [409]:
sol_a = obj.subSystem.isnull().sum() # select rows based on condition 

print('Number of physical component not aggregated in any subSystem: '+str(sol_a))
sol_a = obj[obj['subSystem'].isnull()] 
sol_a = sol_a['physicalComponent'].tolist()

if not sol_a:
    print('The model complies with the rule. No action needs to be performed.')
else:
    print('\nList of physical component currently not aggregated in any subSystem: ')
    for el in sol_a:
        print ("- " +el)
    print("\nSuggestion: Connect each of these physical components to one subSystem.")

AttributeError: 'DataFrame' object has no attribute 'subSystem'

### b/ Is there Physical Component aggregated in more than one SubSystem?

In [None]:
sol_b = obj.groupby(["physicalComponent"]).size()
sol_b = sol_b.to_frame("subSystem_count")

sol_b = sol_b[sol_b['subSystem_count'] > 1] # select rows based on condition 
print('Number of physical components aggregated in more than one SubSystem: '+str(sol_b.shape[0]))

if sol_b.empty:
    print('The model complies with the rule. No action needs to be performed.')
else:
    print('\nList of physical components currently aggregated in more than one SubSystem: ')
    for i in range(sol_b.shape[0]):
        print('- Physical component ' +list(sol_b.index)[i] +' is aggregated in ' +str(list(sol_b.subSystem_count)[i]) +' different SubSystems: ')
        for j in range((obj[obj['physicalComponent'] == list(sol_b.index)[i]]).shape[0]):
            print('  o ' +list(obj[obj['physicalComponent'] == list(sol_b.index)[i]].subSystem)[j])
    print("\nSuggestion: Look for these Physical Components and delete the extra aggregations.")

# Rule #3: Each Subsystem shall be supplied by exactly one WorkPackage - WAITING FOR VERIFICATION

In [None]:
# sparql workpackage query
query = """
PREFIX project: <http://imce.jpl.nasa.gov/foundation/project/project#>
PREFIX oml: <http://def.seegrid.csiro.au/ontology/om/om-lite#>
PREFIX owl:   <http://www.w3.org/2002/07/owl#>
PREFIX rdfs:  <http://www.w3.org/2000/01/rdf-schema#>
PREFIX fse:   <http://imce.jpl.nasa.gov/discipline/fse/fse#>
PREFIX base:  <http://imce.jpl.nasa.gov/foundation/base/base#>
PREFIX analysis: <http://imce.jpl.nasa.gov/foundation/analysis/analysis#>
PREFIX mission: <http://imce.jpl.nasa.gov/foundation/mission/mission#>
PREFIX pr: <http://purl.org/ontology/prv/core#>
PREFIX pro: <http://purl.org/hpi/patchr#>

SELECT DISTINCT ?subSystem ?workPackage

FROM <urn:x-arq:UnionGraph>
WHERE
{
  ?a a fse:Subsystem ;
     base:hasCanonicalName ?subSystem .
  
  optional {
    ?b a project:WorkPackage ;
       base:hasCanonicalName ?workPackage ;
       project:supplies ?a .
  } 
}
"""
if query_fusekiData(query, endpoint.value).empty:
    print('Error - The pattern matching query returned no results. Verify OWL data.')
    raise SystemExit
else:
    result_table = query_fusekiData(query, endpoint.value)

In [None]:
if 'workPackage.value' not in result_table.columns:
    print('Error - No WorkPackage found. Verify OWL data.')
    raise SystemExit
else:
    obj = result_table[["subSystem.value", "workPackage.value"]]
    obj = obj.rename(columns = lambda col: col.replace(".value", ""))

### a/ Is there subsytems not supplied by any WorkPackage?

In [None]:
sol_a = obj.workPackage.isnull().sum() # select rows based on condition 

print('Number of subsystems not supplied by any WorkPackage: '+str(sol_a))
sol_a = obj[obj['workPackage'].isnull()] 
sol_a = sol_a['subSystem'].tolist()

if not sol_a:
    print('The model complies with the rule. No action needs to be performed.')
else:
    print('\nList of subsystems currently not supplied by any WorkPackage: ')
    for el in sol_a:
        print ("- " +el)
    print("\nSuggestion: Connect each of these subsystems to one WorkPackage.")

### b/ Is there subystems supplied by more than one WorkPackage?

In [None]:
sol_b = obj.groupby(["subSystem"]).size()
sol_b = sol_b.to_frame("workPackage_count")

sol_b = sol_b[sol_b['workPackage_count'] > 1] # select rows based on condition 
print('Number of subsystems supplied by more than one WorkPackage: '+str(sol_b.shape[0]))

if sol_b.empty:
    print('The model complies with the rule. No action needs to be performed.')
else:
    print('\nList of subsystems currently supplied by more than one WorkPackage: ')
    for i in range(sol_b.shape[0]):
        print('- Subsystem ' +list(sol_b.index)[i] +' has relationship(s) with ' +str(list(sol_b.workPackage_count)[i]) +' different WorkPackages: ')
        for j in range((obj[obj['subSystem'] == list(sol_b.index)[i]]).shape[0]):
            print('  o ' +list(obj[obj['subSystem'] == list(sol_b.index)[i]].workPackage)[j])
    print("\nSuggestion: Look for these subsytems and delete the extra relationships.")

# Rule #4: Each Physical Component shall perform at least one Function

# Rule #5: Each Physical Component shall satisfy at least one non functional Requirement

# Rule #6: Each Function shall be performed by at least one Physical Component

# Rule #7: Each Requirement shall specify at least one Physical Component

# Rule #8: Each functional Requirement shall specify how the Physical Components perform the function