In [1]:
from rdflib import Graph
import pandas as pd
import math

### Load Tretament Graphs

In [2]:
path = 'treatments/Graph_Study_II/'
"""
g1 = Graph()
g1.parse(path + "G1treatment.ttl", format="ttl")

g2 = Graph()
g2.parse(path + "G2treatment.ttl", format="ttl")
"""
g = Graph()
g.parse("G3.ttl", format="ttl")


<Graph identifier=Na69d9dd8d09b4d0cb2aaa0d9888863ec (<class 'rdflib.graph.Graph'>)>

In [3]:
from rdflib.plugins.sparql.processor import SPARQLResult

def sparql_results_to_df(results: SPARQLResult) -> pd.DataFrame:
    """
    Export results from an rdflib SPARQL query into a `pandas.DataFrame`,
    using Python types. See https://github.com/RDFLib/rdflib/issues/1179.
    """
    return pd.DataFrame(
        data=([None if x is None else x.toPython() for x in row] for row in results),
        columns=[str(x) for x in results.vars],
    )

In [4]:
def cardinality_relation(graph, relation):
    query = """
    select (count(?s) as ?count)
    where {
        ?s <""" + relation + """> ?o
        }
        """
    qres = graph.query(query)
    cardinality = sparql_results_to_df(qres)
    return cardinality.iloc[0][0]


def cardinality_entity(graph, entity):
    query = """
    select (count(?s) as ?count)
    where {
        ?s ?p <""" + entity + """>
        }
        """
    qres = graph.query(query)
    cardinality = sparql_results_to_df(qres).iloc[0][0]
    
    query = """
    select (count(?o) as ?count)
    where {
        <""" + entity + """> ?p ?o
        }
        """
    qres = graph.query(query)
    cardinality += sparql_results_to_df(qres).iloc[0][0]
    return cardinality


def probability_relation(graph, r):
    cardinality  = cardinality_relation(graph, r)
    return cardinality / len(graph)


def probability_entity(graph, e):
    cardinality  = cardinality_entity(graph, e)
    return cardinality / len(graph)


def get_triples(graph):
    query = """    
    select distinct ?s ?predicate ?object
    where {
        ?s ?predicate ?object
        }
        """
    qres = graph.query(query)
    triples = sparql_results_to_df(qres)
    return triples


def list_relation(triples):
    return list(triples.predicate.unique())


def list_entity(triples):
    entity = set(list(triples.s.unique()) + list(triples.object.unique()))
    return entity


def relation_entropy(graph, relation):
    RE = 0
    for r in relation:
        p_r = probability_relation(graph, r)
        val = -p_r*math.log(p_r)
        RE += val
    return RE


def entity_entropy(graph, entity):
    EE = 0
    for e in entity:
        p_e = probability_entity(graph, e)
        val = -p_e*math.log(p_e)
        EE += val
    return EE


def relational_density(graph, relation):
    return len(graph) / len(relation)


def entity_density(graph, entity):
    return 2 * len(graph) / len(entity)

In [5]:
triples = get_triples(g)
relation = list_relation(triples)
entity = list_entity(triples)

In [6]:
print(triples.shape[0], len(relation), len(entity))

16717 7 4396


### Entropy-based measure. Diversity 

In [7]:
relation_entropy(g, relation)

1.634645392177751

In [8]:
entity_entropy(g, entity)

12.94141739271148

### Sparcy metric

In [9]:
RD = relational_density(g, relation)
RD

2388.1428571428573

In [10]:
ED = entity_density(g, entity)
ED

7.605550500454959

In [93]:
"""
              G1                             G2                             G3
------------------------------    ---------------------------------    --------------------------------
  RE  |  EE    |  RD   |  ED  |       RE   |  EE    |  RD   |  ED  |       RE   |  EE    |  RD  |  ED  |        
------ -------- --------------    ------- -------------------------    ------- ------------------------
1.564 | 12.606 |2159.57| 6.878|      1.574 | 12.715 |2388.14| 6.971|       |  || |

"""