In [2]:
from SPARQLWrapper import SPARQLWrapper, JSON
import os
import sys
sys.path.insert(0, os.path.abspath('..'))
import  parser.matcher as matcher

In [3]:
sparql = SPARQLWrapper("http://localhost:9999/blazegraph/namespace/hogeraad/sparql")

In [18]:
import pandas as pd
filepath_csv = '/media/sf_VBox_Shared/CaseLaw/graphs/test3 wg aanspr.csv'
links_wgaans = pd.read_csv(filepath_csv, sep=';', header=0)
links_wgaans.columns = ['id', 'reference']
links_wgaans = links_wgaans.drop_duplicates()
ids_wgaans = list(pd.concat([links_wgaans['id'], links_wgaans['reference']]).unique())

In [19]:
node_list = []
for ecli in ids_wgaans:
    querystring = '''
            prefix dcterm: <http://purl.org/dc/terms/>
            select ?type ?id ?to ?title ?creator ?date ?subject ?abstract ?hasVersion
            where {
                {
                BIND("node" AS ?type).
                ?id dcterm:type	<http://psi.rechtspraak.nl/uitspraak>.
                filter (?id = <http://deeplink.rechtspraak.nl/uitspraak?id=''' + ecli+'''>).
                optional { ?id dcterm:creator ?creator}.
                   optional { ?id dcterm:abstract ?abstract}.
                   optional { ?id dcterm:subject ?subject}.
                   optional { ?id dcterm:date ?date}.
                   optional { ?id dcterm:title ?title}
                }
                
                union
                {
                BIND("link" AS ?type).
                ?id dcterm:references ?to.
                filter (?id = <http://deeplink.rechtspraak.nl/uitspraak?id=''' + ecli+'''>).
                }
                union
                {
                BIND("vindplaats" AS ?type).
                ?id dcterm:hasVersion ?hasVersion
                filter (?id = <http://deeplink.rechtspraak.nl/uitspraak?id=''' + ecli+'''>).
                }
              }
            '''
    #print(querystring)
    sparql.setQuery(querystring)
    sparql.setReturnFormat(JSON)
    ret = sparql.query()
    node_list.append(ret.convert())

In [20]:
nodes = [res for node in node_list for res in node['results']['bindings'] if res['type']['value']=='node']
links = [res for node in node_list for res in node['results']['bindings'] if res['type']['value']=='link']

In [21]:
[link for link in links if link['id']['value'].find('AF5891')>0]

[{'id': {'type': 'uri',
   'value': 'http://deeplink.rechtspraak.nl/uitspraak?id=ECLI:NL:HR:2003:AF5891'},
  'to': {'type': 'uri',
   'value': 'http://deeplink.rechtspraak.nl/uitspraak?id=ECLI:NL:HR:1999:ZC2912'},
  'type': {'type': 'literal', 'value': 'link'}}]

In [22]:
vindplaatsen = [res for node in node_list for res in node['results']['bindings'] if res['type']['value']=='vindplaats']

In [23]:
len(vindplaatsen)

812

In [24]:
vindplaatsen[0]

{'hasVersion': {'type': 'literal', 'value': 'JA 2012/125'},
 'id': {'type': 'uri',
  'value': 'http://deeplink.rechtspraak.nl/uitspraak?id=ECLI:NL:HR:2012:BV1295'},
 'type': {'type': 'literal', 'value': 'vindplaats'}}

In [25]:
def nodes_to_json(nodes_in, variables):
    nodes_json = []
    unique_ids = []
    for d in nodes_in:
        if d['id']['value'] not in unique_ids:
            dout = {}
            for var in variables:
                dout[var] = d.get(var,{'value':''})['value']
            nodes_json.append(dout)
            unique_ids.append(d['id']['value'] )
    return nodes_json, unique_ids

In [26]:
variables = [x for x in node_list[0]['head']['vars'] if x not in ['type', 'from', 'to']]
nodes_json, unique_ids = nodes_to_json(nodes, variables)

In [27]:
# Extra preprocessing step: add articles:
for node in nodes_json:
    articles = matcher.get_articles(node['abstract'])
    node['articles'] = {art+' '+book:cnt for (art,book), cnt in articles.items()}

In [28]:
# Add the number and nr of annotations of vindplaatsen
count_version = {}
count_annotation = {}
for item in vindplaatsen:
    id0 = item['id']['value']
    val = item['hasVersion']['value']
    count_version[id0] = count_version.get(id0 , 0) + 1
    if val.lower().find('met annotatie') >= 0:
        count_annotation[id0] = count_annotation.get(id0 , 0) + 1

In [29]:
for node in nodes_json:
    node['count_version'] = count_version.get(node['id'], 0)
    node['count_annotation'] = count_annotation.get(node['id'], 0)

In [30]:
print(len(links), len(links_wgaans))

196 177


In [31]:
def links_to_json(links_in, node_ids):
    links_json = []
    for d in links_in:
        target = d['to']['value']
        if target in node_ids:
            dout = {'source': d['id']['value'],
                    'target': target }
            dout['id'] = dout
            links_json.append(dout)
    return links_json

In [32]:
references = links_to_json(links, unique_ids)

In [33]:
import json

filename = '/media/sf_VBox_Shared/CaseLaw/graphs/wgaans_sigma_v2.json.json'
#Set id for sigma
for r in references:
    r['id'] = r['source'] + '_' + r['target']

with open(filename, 'w') as outfile:
    json.dump({'nodes': nodes_json, 'edges': references}, fp=outfile)