In [1]:
from rdflib import Graph
from rdflib.term import URIRef, Literal, BNode
from rdflib.namespace import RDF, RDFS, OWL, XSD, SKOS
import os
from tqdm.auto import tqdm

In [2]:



def page_rank(g, ents, num_iterations=10, damping_factor=0.85):
    pr = {e: 1/len(ents) for e in ents}
    
    for _ in range(num_iterations):
        new_pr = {e: 0 for e in ents}
        for e in ents:
            incoming_links = find_incoming_links(e, g)
            sum_rank = 0
            for l in incoming_links:
                lo = count_outgoing_links(l, g)
                sum_rank += damping_factor * pr[l] / lo + (1 - damping_factor) / len(ents)
                
            new_pr[e] = sum_rank
        
        pr = new_pr
    
    return pr

def find_incoming_links(e, g):
    incoming_links = set()
    
    for s, p, o in g.triples((None, None, e)):
            incoming_links.add(s)
    return incoming_links

def count_outgoing_links(e, g):
    objects = set()
    for s, p, o in g.triples((e, None, None)):
        objects.add(o)
    return len(objects)



In [3]:
paths = {
    'conference': '/home/guilherme/Documents/kg/complex/conference/ont',
    'populated_conference': '/home/guilherme/Documents/kg/complex/conference_100/ont',
    'geolink': '/home/guilherme/Documents/kg/complex/geolink',
    'hydrography': '/home/guilherme/Documents/kg/complex/hydrography_ontology/ontology',
    'taxon': '/home/guilherme/Documents/kg/complex/taxon/ont'
}
nb = 0
for ont_name, v in paths.items():
    
    for p, d, f in os.walk(v):
        for fs in tqdm(f):
            if not fs.endswith('.owl') and not fs.endswith('.rdf'):
                continue
            
                
            g = Graph().parse(os.path.join(p, fs))
            
            ents = set(g.subjects())
            ranks = page_rank(g, ents, num_iterations=10, damping_factor=0.8)
            
            values = list(sorted(ranks.items(), key=lambda x: x[1], reverse=True))
            
            bk, bv = values[0]
            
            fv = list(filter(lambda x: x[1] / bv > 0.4, values))
            for k, v in fv[:30]:
                kv = g.value(k, RDF.type)
                if kv is None:
                    continue
                if kv == OWL.Class:
                    cqn = k.split('#')[-1].lower().split("/")[-1]
                    
                    os.makedirs(f'/home/guilherme/Documents/kg/complex/prcqas/{ont_name}/{fs.split(".")[0]}', exist_ok=True)
                    with open(f'/home/guilherme/Documents/kg/complex/prcqas/{ont_name}/{fs.split(".")[0]}/c-{cqn}.sparql', 'w') as f:
                        f.write(f'SELECT DISTINCT ?x WHERE {{?x a <{k}>.}}')
                elif 'property' in kv.lower():
                    cqn = k.split('#')[-1].lower().split("/")[-1]
                    os.makedirs(f'/home/guilherme/Documents/kg/complex/prcqas/{ont_name}/{fs.split(".")[0]}', exist_ok=True)
                    with open(f'/home/guilherme/Documents/kg/complex/prcqas/{ont_name}/{fs.split(".")[0]}/p-{cqn}.sparql', 'w') as f:
                        f.write(f'SELECT DISTINCT ?x ?y WHERE {{?x <{k}> ?y.}}')
                else:
                    cqn = k.split('#')[-1].lower().split("/")[-1]
                    
                    os.makedirs(f'/home/guilherme/Documents/kg/complex/prcqas/{ont_name}/{fs.split(".")[0]}', exist_ok=True)
                    with open(f'/home/guilherme/Documents/kg/complex/prcqas/{ont_name}/{fs.split(".")[0]}/nc-{cqn}.sparql', 'w') as f:
                        f.write(f'SELECT DISTINCT ?x WHERE {{?x a <{k}>.}}')
                    
    
                    
                

  0%|          | 0/16 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/6 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]