In [62]:
import networkx as nx
import re
import json

In [7]:
# Function takes filename as input in text format and 
# returns a dictionary where the key is the subject of the triples and the values are all 
# the properties with objects


def getDic(filename):
    with open(filename, encoding="utf-8") as f:
        lines = f.readlines()

    lines=  list(set(lines))

    ddic={}
    for i in lines:
        i=i.strip('\n')
        sub=i.split(' ')[0].strip()
    #     print(sub)
        if sub not in ddic:
            ddic[sub]=[' '.join(i.split(' ')[1:])]
        else:
    #         print(i,ddic[sub])
            items=' '.join(i.split(' ')[1:])
    #         print(items,' '.join(ddic[sub]),'\n')
    #         if items not in ' '.join(ddic[sub]):
            ddic[sub].append(items)
    return ddic


In [43]:
# This function takes a list of tuples as input. The list of tuples are in the order of 
# (subj, subj class, obj, obj class, property connecting subj and obj). Using the input, a graph is generated with the 
# required labels (attributes) of nodes and edges


def addNodes(lst):
    G = nx.Graph()
    for i in lst:
        subj = i[0]
        subj_class = i[1]
        obj=i[2]
        obj_class=i[3]
        edge_label=i[4]
        G.add_node(subj, Class=subj_class)
        G.add_node(obj, Class=obj_class)
        G.add_edge(subj, obj, Label=edge_label)
    
    return G

In [39]:
# This function is used to extract quotes within strings. This is helpful in case of extracting labels of entities 
# so that they are human readable 

def getMatch(text):
    matches = re.findall(r'"(.+?)"',text)
    return ",".join(matches)

In [40]:
# This function takes a dic and a property (with their subj obj classes as input) and basically extracts all the 
# relevent tripes of the property. Transforms the instances in a tuple structure (to avoid repetition in case any) and finally 
# saves them all in a list. This list is then passed to the 'addNodes' function to generate a graph.

def genSubGraph(ddic,l):
    nodes=set()
    for key,value in ddic.items():
        for i in value:
            if l[2]==i.split(' ')[0]:
#                     Here there is a special condition here which is looking at location created property. 
#                     This is present here only to cater to the NISV dataset since it as yet to properly define locations in their 
#                     dataset.
                if l[2]=='sdo:locationCreated':
                    loc= getMatch(i)
                    loc= loc.split(' ')[0]
                    nodes.add((key,l[0],loc,l[1],l[2]))
                else:
                    for j in i.split(' '):
                        if 'gtaa:' in j:
                            nodes.add((key,l[0],j,l[1],l[2]))
                                

    nodes=list(nodes)
    return addNodes(nodes)




In [35]:
ddic = getDic('items2.txt')

In [55]:
# This function takes a list of properties (with their classes defined) and returns a list of subgraphs for each of the property 
# in the list

def genGraph(ddic,prop):
    subg=[]
    for p in prop:
        s=genSubGraph(ddic,p)
        subg.append([s,p[2]])
    
    return subg

In [56]:
# ls = [['Creative Work','Person','sdo:byArtist'],['Creative Work','Organization','sdo:provider'],['Creative Work','Thing','sdo:genre'],['Creative Work','Person','sdo:creator'],['Creative Work','Person','sdo:mentions'],['Creative Work','Location','sdo:locationCreated'],['Creative Work','Organization','sdo:productionCompany']]
# ls = [['Creative Work','Person','sdo:byArtist'],['Creative Work','Organization','sdo:provider'],['Creative Work','Thing','sdo:genre'],['Creative Work','Location','sdo:locationCreated'],['Creative Work','Organization','sdo:productionCompany'],['Creative Work','Person','sdo:creator']]
ls = [['Creative Work','Person','sdo:byArtist'],['Creative Work','Person','sdo:creator'],['Creative Work','Person','sdo:mentions']]
G=genGraph(ddic,ls)

[[<networkx.classes.graph.Graph at 0x242597c3e10>, 'sdo:byArtist'],
 [<networkx.classes.graph.Graph at 0x24265df1310>, 'sdo:creator'],
 [<networkx.classes.graph.Graph at 0x242e9947c50>, 'sdo:mentions']]

In [59]:
def getScore(subg):
    score=[]
    for g in subg:
        eigen = nx.eigenvector_centrality(g[0],max_iter=1500,tol=1.0e-5)
        score.append([g[1],eigen])
        
    return score

In [60]:
s = getScore(G)

In [None]:
os.rmdir('empty_dir_path')

In [61]:
def saveScores(s):
    for score in s:
        with open('result2.json', 'w') as fp:
            json.dump(eigen, fp)
            


[['sdo:byArtist',
  {'<http://data.beeldengeluid.nl/id/program/2101608140130037231>': 6.60742657400393e-12,
   'gtaa:51564': 6.063572459231975e-10,
   '<http://data.beeldengeluid.nl/id/scene/2101702280772103924>': 9.138253194652927e-06,
   'gtaa:1244019': 6.641890924925888e-07,
   '<http://data.beeldengeluid.nl/id/scene/2102108200306006224>': 8.738260454217105e-08,
   'gtaa:1713773': 1.3051496124581693e-09,
   '<http://data.beeldengeluid.nl/id/scene/2101702280766256924>': 4.859247500790916e-06,
   'gtaa:79642': 0.00044595391416665954,
   '<http://data.beeldengeluid.nl/id/scene/2101702280771351024>': 6.484344301344274e-07,
   'gtaa:1641422': 2.905561827558703e-07,
   '<http://data.beeldengeluid.nl/id/program/2101608140128853731>': 0.0012725945178474766,
   'gtaa:55158': 0.11679074591391224,
   '<http://data.beeldengeluid.nl/id/scene/2101702280773395024>': 4.567164490566981e-09,
   'gtaa:140995': 4.190434348533099e-07,
   '<http://data.beeldengeluid.nl/id/program/2101608140127275531>': 0