In [51]:
from rdflib import *
import numpy as np

In [15]:
g = ConjunctiveGraph()

g.parse('output_graph.trig', format='trig')

<Graph identifier=file:///C:/Users/valep/Downloads/wikipedia/output_graph.trig (<class 'rdflib.graph.Graph'>)>

In [6]:
from rdflib import *

g = ConjunctiveGraph()

kb = Namespace("http://example.org/data/")
g.bind("kb", kb)

sebi = Namespace("http://example.org/ontology/")
g.bind("sebi", sebi)

hico = Namespace("https://w3id.org/hico#")
g.bind("hico", hico)

dct = Namespace("http://purl.org/dc/terms/")
g.bind("dct", dct)

time = Namespace("https://www.w3.org/TR/owl-time/")
g.bind = ("time", time)

prov = Namespace("http://www.w3.org/ns/prov#")
g.bind = ("prov", prov)

wd = Namespace("http://www.wikidata.org/entity/")
g.bind = ("wd", wd)

factual_data = URIRef("http://example.org/factual_data")

In [74]:
opinions_query = """
SELECT DISTINCT ?g
WHERE {
  GRAPH ?g {
    VALUES ?opinion { sebi:Authentic sebi:Forgery sebi:Suspicious }
    ?doc a ?opinion
    }
}
"""
opinions = g.query(opinions_query)

print(len(opinions))

278


In [75]:
documents_query = """
SELECT DISTINCT ?doc
WHERE {
  VALUES ?opinion { sebi:Authentic sebi:Forgery sebi:Suspicious }

  GRAPH ?g {
    ?doc a ?opinion
    }
}
"""
documents = g.query(documents_query)

print(len(documents))

51


In [18]:
def content_completeness_queries(uri, predicate, completeness_score):
    query = """
        ASK {
            GRAPH <%s> {
                ?doc <%s> ?object .
            }
        }
    """ % (
        uri,
        predicate,
    )

    has_element = g.query(query)
    if has_element.askAnswer == True:
        completeness_score += 1

    return completeness_score


def context_completeness_queries(uri, predicate, completeness_score, inverse):
    if inverse == True:
        query = """
          ASK {
              ?object <%s> <%s> .
          }
    """ % (
            predicate,
            uri,
        )
    else:
        query = """
          ASK {
              <%s> <%s> ?object .
          }
      """ % (
            uri,
            predicate,
        )
    has_element = g.query(query)
    if has_element.askAnswer == True:
        completeness_score += 1

    return completeness_score


completeness_metrics, content_completeness_metrics, context_completeness_metrics = (
    [],
    [],
    [],
)
for row in opinions:
    content_completeness_score, context_completeness_score = 0, 0
    uri = URIRef(row[0])
    content_completeness_score = content_completeness_queries(
        uri, RDF.type, content_completeness_score
    )
    content_completeness_score = content_completeness_queries(
        uri, dct.creator, content_completeness_score
    )
    content_completeness_score = content_completeness_queries(
        uri, dct.date, content_completeness_score
    )
    content_completeness_score = content_completeness_queries(
        uri, dct.coverage, content_completeness_score
    )
    content_completeness_score = content_completeness_queries(
        uri, sebi.intention, content_completeness_score
    )
    context_completeness_score = context_completeness_queries(
        uri, sebi.support, context_completeness_score, inverse=True
    )
    context_completeness_score = context_completeness_queries(
        uri, hico.hasInterpretationCriterion, context_completeness_score, inverse=False
    )

    content_completeness_score_normalized = content_completeness_score / 5.0
    context_completeness_score_normalized = context_completeness_score / 2.0

    completeness_score = (content_completeness_score_normalized + context_completeness_score_normalized)
    completeness_final_score = round(completeness_score / 2, 2)  # Because sum of normalized scores can be at most 2
    completeness_metrics.append(completeness_final_score)

    content_completeness_score = round(content_completeness_score_normalized, 2)
    content_completeness_metrics.append(content_completeness_score)

    context_completeness_score = round(context_completeness_score_normalized, 2)
    context_completeness_metrics.append(context_completeness_score)

completeness_max_value = max(completeness_metrics)
completeness_min_value = min(completeness_metrics)
completeness_avg_value = sum(completeness_metrics) / len(completeness_metrics)

content_completeness_max_value = max(content_completeness_metrics)
content_completeness_min_value = min(content_completeness_metrics)
content_completeness_avg_value = sum(content_completeness_metrics) / len(content_completeness_metrics)

context_completeness_max_value = max(context_completeness_metrics)
context_completeness_min_value = min(context_completeness_metrics)
context_completeness_avg_value = sum(context_completeness_metrics) / len(context_completeness_metrics)

print(completeness_max_value, completeness_min_value, completeness_avg_value)
print(content_completeness_max_value,content_completeness_min_value,content_completeness_avg_value,)
print(context_completeness_max_value,context_completeness_min_value,context_completeness_avg_value,)

0.9 0.1 0.5573476702508937
0.8 0.2 0.27240143369175734
1.0 0.0 0.8422939068100358


In [63]:
most_debated_query = """
PREFIX hico: <https://w3id.org/hico#>
PREFIX sebi: <http://example.org/ontology/>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix kb: <http://example.org/data/>
select distinct ?doc (count(distinct ?g) as ?n)

where {
    VALUES ?opinion { sebi:Authentic sebi:Forgery sebi:Suspicious }
    GRAPH ?g {?doc a ?opinion}
    }

group by ?doc
"""
most_debated = g.query(most_debated_query)
debatability = []
for row in most_debated:
    debatability.append(int(row[1]))

deb_max_value = max(debatability)
deb_min_value = min(debatability)
deb_avg_value = sum(debatability) / len(debatability)

print(deb_max_value, deb_min_value, deb_avg_value)

28 1 5.450980392156863


In [78]:
controversial_counts = []

most_controversial_query = """
PREFIX hico: <https://w3id.org/hico#>
PREFIX sebi: <http://example.org/ontology/>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX kb: <http://example.org/data/>

SELECT ?doc 
        (SUM(IF(?opinion = sebi:Forgery, 1, 0)) AS ?forgeryCount)
       (SUM(IF(?opinion = sebi:Authentic, 1, 0)) AS ?authenticCount)
       (SUM(IF(?opinion = sebi:SuspiciousDocument, 1, 0)) AS ?suspiciousCount)
       (COUNT(distinct ?g) AS ?totalClaims)
    WHERE {
    GRAPH ?g {
        ?doc a ?opinion .
        FILTER (?opinion IN (sebi:Forgery, sebi:Authentic, sebi:SuspiciousDocument))
    }
}
GROUP BY ?doc
"""
most_controversial = g.query(most_controversial_query)

for row in most_controversial:
    document = row[0]
    forgery_count = int(row[1])
    authentic_count = int(row[2])
    suspicious_count = int(row[3])
    total_claims = int(row[4])
    
    forgery_count *= -1
    authentic_count *=  1
    suspicious_count *= 0 
        
    total_count = (forgery_count + suspicious_count + authentic_count) / total_claims
    controversial_counts.append(total_count)

std = np.std(total_counts, ddof=0)  
con_max_value = max(controversial_counts)
con_min_value = min(controversial_counts)
con_avg_value = sum(controversial_counts) / len(controversial_counts)

print(con_max_value, con_min_value, con_avg_value)

print(f"Standard deviation of total counts: {std}")

1.0 -1.0 -0.3780693229672821
Standard deviation of total counts: 0.5954605932039252


In [None]:
opinions_query = """
SELECT DISTINCT ?g
WHERE {
  GRAPH ?g {
    VALUES ?opinion { sebi:Authentic sebi:Forgery sebi:Suspicious }
    ?doc a ?opinion
    }
}
"""
opinions = g.query(opinions_query)

print(len(opinions))