In [16]:
import sqlite3
import pandas as pd
from pygraphml import Graph
from pygraphml import GraphMLParser

In [69]:
conn = sqlite3.connect('rechtspraak.db')
c = conn.cursor()

In [3]:
conn.execute('''SELECT name FROM sqlite_master WHERE type='table';''').fetchall()

[('uitspraken_meta',), ('uitspraken',), ('uitspraken_links',)]

In [70]:
#Get the links to other HR cases
links = pd.DataFrame(c.execute('''select l.id, l.reference
            from uitspraken_links l join 
                ( select id from uitspraken_meta ) s
                on s.id = l.reference
        ''').fetchall(), columns=['id', 'reference'])

In [14]:
# Get descriptions of the cases
descriptions = pd.DataFrame(c.execute('''select l.id, r.summary
            from ( 
                select distinct id from uitspraken_links 
                union
                select distinct reference as id from uitspraken_links 
                ) l 
            join uitspraken_meta r
                on l.id = r.id
        ''').fetchall(), columns=['id', 'description'])

In [20]:
descriptions = descriptions.set_index('id')

In [75]:
conn.close()

In [115]:

def year_from_id(text):
    return text.split(':')[3]


def make_graph(descriptions, links):
    g = Graph()
    node_dict = {}
    for ecli in descriptions.index:
        n = g.add_node(ecli)
        n['description'] = descriptions['description'][ecli]
        n['year'] = int(year_from_id(ecli))
        node_dict[ecli] = n
    # Add edges
    for i in links.index:
        n1, n2 = (links['id'][i], links['reference'][i])
        g.add_edge(node_dict[n1], node_dict[n2], directed=True)
    return g

def save_graph(graph, outputpath):
    parser = GraphMLParser()
    parser.write(graph, outputpath)

In [89]:
outputpath = '/media/sf_VBox_Shared/CaseLaw/graphs/eclilinks.graphml'
g = make_graph(descriptions, links)
save_graph(g, outputpath)

In [67]:
#Smaller graph: search for werkgever:
lower = descriptions['description'].str.lower()
descriptions_small = descriptions[lower.str.contains('aansprakelijkheid')]

In [79]:
links.head()

Unnamed: 0,id,reference
0,ECLI:NL:HR:2013:1036,ECLI:NL:HR:2007:BA1522
1,ECLI:NL:HR:2013:1040,ECLI:NL:HR:2011:BP2314
2,ECLI:NL:HR:2013:1042,ECLI:NL:HR:2013:BZ3640
3,ECLI:NL:HR:2013:1047,ECLI:NL:HR:2012:BV0229
4,ECLI:NL:HR:2013:1048,ECLI:NL:HR:2012:BV0258


In [91]:
links_small = links.merge(descriptions_small, left_on='id', right_index=True)[['id', 'reference']]

In [104]:
ids_small.tolist()[:10]

['ECLI:NL:HR:2013:1079',
 'ECLI:NL:HR:2013:BZ7189',
 'ECLI:NL:HR:2013:BZ7396',
 'ECLI:NL:HR:2013:BZ7397',
 'ECLI:NL:HR:2014:1204',
 'ECLI:NL:HR:2014:1525',
 'ECLI:NL:HR:2014:1542',
 'ECLI:NL:HR:2014:2149',
 'ECLI:NL:HR:2014:2150',
 'ECLI:NL:HR:2014:2627']

In [110]:
ids_small = pd.concat([links_small['id'], links_small['reference']]).unique()
descriptions_small = descriptions.loc[ids_small]

In [116]:
outputpath = '/media/sf_VBox_Shared/CaseLaw/graphs/eclilinks_small.graphml'
g2 = make_graph(descriptions_small, links_small)
save_graph(g2, outputpath)

In [113]:
descriptions_small.shape

(112, 1)