In [1]:
import requests 
import json

import os
import pandas as pd
import numpy as np
import math
import re
import time

In [2]:
#legal intelligence credentials
CLIENT_ID = 'marion.meyers@student.maastrichtuniversity.nl'
CLIENT_SECRET = '8bf03e23-1faf-430d-a5a5-ab907b5af436'

# Methods needed for using the LI API

In [3]:
def get_access_token():
    data = {
     "grant_type": "client_credentials",
     "client_id": CLIENT_ID,
     "client_secret": CLIENT_SECRET
    }

    headers = {
     "Content-Type": "application/x-www-form-urlencoded",
     "X-SubSiteCode": "LI",
     "cache-control": "no-cache"
    }

    request = requests.post('https://api.legalintelligence.com/token', data=data, headers=headers)
    #print('Auth status code: %s' %request.status_code)

    response = request.json()
    #print('Auth access code: %s' %response['access_token'])
    
    return response['access_token']

In [4]:
def get_search_query(query, filters=[]):
    headers = {    
        "x-subsitecode": "LI",    
        "authorization": "Bearer %s" %get_access_token(),    
        "accept": "application/json"  
    }
    params = {
        "start" :  0,
        "rows" : 40
    }
   
    link = 'https://api.legalintelligence.com/search?q=%s'%query
    for filter in filters:
        link += '&fq=%s' %filter
#     link += '&fq='.join(filters)
    
    request = requests.get(link, headers=headers, params = params)
    
    
    #total number of cases retrieved by the given query
    count = request.json()["Count"]
    print("case count : "+str(count))
    
    #because we are using 40 as the number of cases retrieved at a time by Legal Intelligence (see params), 
    #this is the total number of iterations we will have tot loop through in order to retrieve all cases
    nb_pages = math.ceil(count/40)
    print('number of pages : '+str(nb_pages))
    
    page_index = 1
    pages = list()

    #append the first request to the list of request dictionaries
    pages.append(request.json())
    
    
    #go through all pages, and add each dictionary request to the list until no more pages
    while page_index < nb_pages: 
        print('page index : '+str(page_index))
        params = {
            "start" :  page_index,
            "rows" : 40
        }
        #we put the computer to sleep every 50 requests, to avoid the API limit to be exceeded
        if (page_index/50).is_integer():
            print('put computer to sleep, it has been 50 request')
            time.sleep(70)
            request = requests.get(link, headers=headers, params = params)
            pages.append(request.json())
            page_index = page_index + 1
        else:
            request = requests.get(link, headers=headers, params = params)
            #here I am unsure what the error exactly is... so for now I just excluse it as an exception
            try:
                pages.append(request.json())
            except:
                print('weird error')
                #print(request.json())
            page_index = page_index + 1  

  
    total_search_results = pages[0]
    counter = 0 
    for page in pages[1:]:
        print('page count : '+str(counter))
        counter = counter +1
        print('page type : '+str(type(page)))
        if isinstance(page, str):
            print('ERROR : page is a string, and not a dictionary')
            print(page)
        else:
            #only take the Documents sections of each page, and add it to a general Documents section
            #such that it looks like a big list of a cases and not list of json files
            total_search_results['Documents'] = total_search_results['Documents'] + page['Documents']
        
    return total_search_results

Now, in order to be able to merge informations from Rechtspraak and Legal Intelligence, we need to match the corresponding cases. To do this, we will use the ECLI number. However, this number is not present in the json format output by Legal Intelligence, but it is present in the request.text format that we can also have from our query. 

In [5]:
#retrieve the document based on its id
def get_document(id):
    headers = {    
        "x-subsitecode": "LI",    
        "authorization": "Bearer %s" %get_access_token(),    
        "accept": "application/json"  
    }

    request = requests.get('https://api.legalintelligence.com/documents/%s' %id, headers=headers)
    
    return request.text

In [6]:
#this method takes the case text as an argument, searches for teh ecli number and outputs it. 
def getECLInumber(caseText):
    ecliStart = caseText.find('ECLI')
    #since there are maximum 25 characers in an ECLI number, we can just take that whole chunk. It makes things faster
    #than if we were to split the whole text
    caseText = caseText[ecliStart:ecliStart+25]
    #print('print 25 after ECLI : '+str(caseText))
    ecliNumber = caseText.split('<')[0]
    ecliNumber = ecliNumber.split(' ')[0]
    ecliNumber = ecliNumber.split(',')[0]
    ecliNumber = ecliNumber.replace(':','_')
    
    return ecliNumber 

In [7]:
def saveTextToArchive(year, ecliNumber, caseText, save_path):
    print('saving case as html file')
    
    #replace with where the location of the Rechspraak Archive
    save_path_html = save_path+"/"+ecliNumber+".html"
    
    file = open(save_path_html, "w", encoding="utf-8")
    file.write(caseText)
    file.close()

In [8]:
def saveJsonToArchive(year, ecliNumber, json_file):
    print('saving case as a json file')
    
    #replace with where the location of the Rechspraak Archive
    save_path_json = "C:/Users/mario/Documents/LawTechLab/Comenius/Archive/"+str(year)+"/"+ecliNumber+".json"
    path_xml = "C:/Users/mario/Documents/LawTechLab/Comenius/Archive/"+str(year)+"/"+ecliNumber+".xml"

    #see if LI case is new or already in the archive
    if os.path.isfile(path_xml):
        print("LI case existed in archive")
        with open(save_path_json, 'w') as fp:
            json.dump(json_file, fp)
        return True
    else: 
        print('new case found in LI')
        return False
    

# Create the LI Dataframe

In [29]:
#This method takes a certain year, the path where the LI cases are located as json files as parameters. 
#It outputs a pandas dataframe of all LI cases of a certain year
#It also saves an html file for each case in order to later be able to retrieve the full text from it

def createLIDataframe(year_dump, save_path, id_list, json_files): 
    #save_path = "C:/Users/mario/Documents/LawTechLab/legal-intelligence-api/notebooks/legalIntelDumpsSmall/"
    #get the id list of the case files of all LI cases found for that year
    #id_list, json_files = getIdListAndJsonFromYearDump(save_path+year+'.json')
    
    LI_df = pd.DataFrame()
    count_id = 0 
    #go through each of the cases
    for index, case_id in enumerate(id_list):
        print("index : "+str(index))
        count_id = count_id+1
        #put the computer to sleep every 50 cases to avoid exceeding the API limit
        #if (count_id/15).is_integer():
         #   print('put computer to sleep, it has been 15 request')
          #  time.sleep(1)
        
        print("case_id : "+str(case_id))
        try: 
            casetext = get_document(case_id)
       
        except Exception as e:
            print(e)
            print("weird error")
            time.sleep(20)
            casetext = get_document(case_id)
            
    #get the json file for the case  
        json_file = json_files[index]
        #get the year number
        year = year_dump[0:4]

        #get the ECLI number from the case, and check if it is valid
        ecliNumber = getECLInumber(casetext)
        regex = re.compile('[@!#$%^&*()<>?/\|}{~:]') 
        #if the ecli number doesn't contain any of the weird characters above, and is not empty, then it is valid. 
        if(regex.search(ecliNumber) == None and ecliNumber !=''): 
            #print("valid ecli number : "+str(ecliNumber)) 

            #convert json file to a panda dataframe
            law_area_list = [json_file['LawArea']]
            #print('law area list size : '+str(len(law_area_list)))

            #print(str(type(json_file['LawArea'])))
            del json_file['LawArea']
            #print('json file type : '+str(type(json_file)))
            db = pd.DataFrame(json_file)
            db = db.rename(columns={"PublicationDate": "date", "CaseNumber": "case_number", "Summary":"abstract", "EnactmentDate":"lodge_date", "IssuingInstitution":"authority"})
            #print('database size : '+str(db.shape))
            if db.shape[0] >1:
                db= db.drop(db.index[1:])
            #print('database columns : '+str(db.columns))
            #print(db.head())
            db['LawArea'] = law_area_list
            db['ecli'] = ecliNumber
            #merge this dataframe with the overall one
            LI_df = pd.concat([LI_df, db], axis=0, ignore_index=True)

            #save the case as html file such that we can retrieve the full text from it later on 
            saveTextToArchive(year, ecliNumber, casetext, save_path+'/'+str(year))


        else: 
            print("ERROR : not a correct ECLI number : we don't add the document for now "+str(ecliNumber))
          
    #re-arrange dataframe such that the ecli number is at the front
    cols = LI_df.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    LI_df = LI_df[cols] 
    
    print(LI_df.head())
    return LI_df


# Main Method

In [30]:
#list of years we want to retrieve LI cases for
years = list(range(1972, 2001))
#where we want to store the csv files as well as the individual html docs
save_path = "C:/Users/mario/Documents/LawTechLab/legal-intelligence-api/LI_archive"

for year in years:
    #time.sleep(70)
    print(year)
    
    #get all cases from LI for that year
    search_results = get_search_query(str(year),['Jurisdiction_HF%3A1%7C010_Nederland%7C010_Rechtspraak'])
    
    #code to save them as json files
    with open(save_path+'/'+str(year)+'.json', 'w') as fp:
        json.dump(search_results, fp)
   
    #get ids and case_json from the dump
    ids = [document["Id"] for document in search_results["Documents"]]
    json_files = search_results["Documents"]
    
    #create dataframe
    os.mkdir(save_path+'/'+str(year)+'/')
    LI_df = createLIDataframe(str(year), save_path, ids, json_files)
    LI_df.to_csv(save_path+'/'+str(year)+'/'+str(year)+'.csv')
    

1972
case count : 1723
number of pages : 44
page index : 1
page index : 2
page index : 3
page index : 4
page index : 5
page index : 6
page index : 7
page index : 8
page index : 9
page index : 10
page index : 11
page index : 12
page index : 13
page index : 14
page index : 15
page index : 16
page index : 17
page index : 18
page index : 19
page index : 20
page index : 21
page index : 22
page index : 23
page index : 24
page index : 25
page index : 26
page index : 27
page index : 28
page index : 29
page index : 30
page index : 31
page index : 32
page index : 33
page index : 34
page index : 35
page index : 36
page index : 37
page index : 38
page index : 39
page index : 40
page index : 41
page index : 42
page index : 43
page count : 0
page type : <class 'dict'>
page count : 1
page type : <class 'dict'>
page count : 2
page type : <class 'dict'>
page count : 3
page type : <class 'dict'>
page count : 4
page type : <class 'dict'>
page count : 5
page type : <class 'dict'>
page count : 6
page type 

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




saving case as html file
index : 7
case_id : 3786246
saving case as html file
index : 8
case_id : 3786245
saving case as html file
index : 9
case_id : 3786254
saving case as html file
index : 10
case_id : 3786250
saving case as html file
index : 11
case_id : 3768381
saving case as html file
index : 12
case_id : 3801459
saving case as html file
index : 13
case_id : 4598997
saving case as html file
index : 14
case_id : 4752072
saving case as html file
index : 15
case_id : 3768356
saving case as html file
index : 16
case_id : 3768366
saving case as html file
index : 17
case_id : 4751953
saving case as html file
index : 18
case_id : 4752406
saving case as html file
index : 19
case_id : 4241013
saving case as html file
index : 20
case_id : 4241004
saving case as html file
index : 21
case_id : 4598940
saving case as html file
index : 22
case_id : 3786266
saving case as html file
index : 23
case_id : 4752187
saving case as html file
index : 24
case_id : 3786253
saving case as html file
index 

saving case as html file
index : 154
case_id : 4752572
saving case as html file
index : 155
case_id : 4752550
saving case as html file
index : 156
case_id : 4752395
saving case as html file
index : 157
case_id : 4240997
saving case as html file
index : 158
case_id : 3786222
saving case as html file
index : 159
case_id : 3786240
saving case as html file
index : 160
case_id : 3806119
saving case as html file
index : 161
case_id : 4598989
saving case as html file
index : 162
case_id : 3768377
saving case as html file
index : 163
case_id : 3786246
saving case as html file
index : 164
case_id : 3786245
saving case as html file
index : 165
case_id : 3786254
saving case as html file
index : 166
case_id : 3786250
saving case as html file
index : 167
case_id : 3768381
saving case as html file
index : 168
case_id : 3801459
saving case as html file
index : 169
case_id : 4598997
saving case as html file
index : 170
case_id : 4752072
saving case as html file
index : 171
case_id : 3768356
saving cas

saving case as html file
index : 302
case_id : 4240995
saving case as html file
index : 303
case_id : 3806048
saving case as html file
index : 304
case_id : 4598950
saving case as html file
index : 305
case_id : 4752067
saving case as html file
index : 306
case_id : 3804782
saving case as html file
index : 307
case_id : 4752226
saving case as html file
index : 308
case_id : 4598912
saving case as html file
index : 309
case_id : 4752496
saving case as html file
index : 310
case_id : 4752572
saving case as html file
index : 311
case_id : 4752550
saving case as html file
index : 312
case_id : 4752395
saving case as html file
index : 313
case_id : 4240997
saving case as html file
index : 314
case_id : 3786222
saving case as html file
index : 315
case_id : 3786240
saving case as html file
index : 316
case_id : 3806953
saving case as html file
index : 317
case_id : 3768348
saving case as html file
index : 318
case_id : 4752660
saving case as html file
index : 319
case_id : 3802376
saving cas

saving case as html file
index : 448
case_id : 4241013
saving case as html file
index : 449
case_id : 4241004
saving case as html file
index : 450
case_id : 4598940
saving case as html file
index : 451
case_id : 3786266
saving case as html file
index : 452
case_id : 4752187
saving case as html file
index : 453
case_id : 3786253
saving case as html file
index : 454
case_id : 3768347
saving case as html file
index : 455
case_id : 3768367
saving case as html file
index : 456
case_id : 3768346
saving case as html file
index : 457
case_id : 4599016
saving case as html file
index : 458
case_id : 4240995
saving case as html file
index : 459
case_id : 3806048
saving case as html file
index : 460
case_id : 4598950
saving case as html file
index : 461
case_id : 4752067
saving case as html file
index : 462
case_id : 3804782
saving case as html file
index : 463
case_id : 4752226
saving case as html file
index : 464
case_id : 4598912
saving case as html file
index : 465
case_id : 4752496
saving cas

saving case as html file
index : 594
case_id : 4752478
saving case as html file
index : 595
case_id : 3768330
saving case as html file
index : 596
case_id : 3768353
saving case as html file
index : 597
case_id : 3768354
saving case as html file
index : 598
case_id : 3806118
saving case as html file
index : 599
case_id : 4752297
saving case as html file
index : 600
case_id : 3768356
saving case as html file
index : 601
case_id : 3768366
saving case as html file
index : 602
case_id : 4751953
saving case as html file
index : 603
case_id : 4752406
saving case as html file
index : 604
case_id : 4241013
saving case as html file
index : 605
case_id : 4241004
saving case as html file
index : 606
case_id : 4598940
saving case as html file
index : 607
case_id : 3786266
saving case as html file
index : 608
case_id : 4752187
saving case as html file
index : 609
case_id : 3786253
saving case as html file
index : 610
case_id : 3768347
saving case as html file
index : 611
case_id : 3768367
saving cas

saving case as html file
index : 740
case_id : 4752550
saving case as html file
index : 741
case_id : 4752395
saving case as html file
index : 742
case_id : 4240997
saving case as html file
index : 743
case_id : 3786222
saving case as html file
index : 744
case_id : 3786240
saving case as html file
index : 745
case_id : 3806953
saving case as html file
index : 746
case_id : 3768348
saving case as html file
index : 747
case_id : 4752660
saving case as html file
index : 748
case_id : 3802376
saving case as html file
index : 749
case_id : 4752144
saving case as html file
index : 750
case_id : 4752478
saving case as html file
index : 751
case_id : 3768330
saving case as html file
index : 752
case_id : 3768353
saving case as html file
index : 753
case_id : 3768354
saving case as html file
index : 754
case_id : 3806118
saving case as html file
index : 755
case_id : 4752297
saving case as html file
index : 756
case_id : 4752541
saving case as html file
index : 757
case_id : 3768328
saving cas

saving case as html file
index : 886
case_id : 4599016
saving case as html file
index : 887
case_id : 4240995
saving case as html file
index : 888
case_id : 3806048
saving case as html file
index : 889
case_id : 4598950
saving case as html file
index : 890
case_id : 4752067
saving case as html file
index : 891
case_id : 3804782
saving case as html file
index : 892
case_id : 4752226
saving case as html file
index : 893
case_id : 4598912
saving case as html file
index : 894
case_id : 4752496
saving case as html file
index : 895
case_id : 4752572
saving case as html file
index : 896
case_id : 4752550
saving case as html file
index : 897
case_id : 4752395
saving case as html file
index : 898
case_id : 4240997
saving case as html file
index : 899
case_id : 3786222
saving case as html file
index : 900
case_id : 3786240
saving case as html file
index : 901
case_id : 3806953
saving case as html file
index : 902
case_id : 3768348
saving case as html file
index : 903
case_id : 4752660
saving cas

saving case as html file
index : 1033
case_id : 4598807
saving case as html file
index : 1034
case_id : 4752594
saving case as html file
index : 1035
case_id : 3768322
saving case as html file
index : 1036
case_id : 4244222
saving case as html file
index : 1037
case_id : 4598919
saving case as html file
index : 1038
case_id : 4752229
saving case as html file
index : 1039
case_id : 3768334
saving case as html file
index : 1040
case_id : 3768367
saving case as html file
index : 1041
case_id : 3768346
saving case as html file
index : 1042
case_id : 4599016
saving case as html file
index : 1043
case_id : 4240995
saving case as html file
index : 1044
case_id : 3806048
saving case as html file
index : 1045
case_id : 4598950
saving case as html file
index : 1046
case_id : 4752067
saving case as html file
index : 1047
case_id : 3804782
saving case as html file
index : 1048
case_id : 4752226
saving case as html file
index : 1049
case_id : 4598912
saving case as html file
index : 1050
case_id : 

index : 1179
case_id : 4752478
saving case as html file
index : 1180
case_id : 3768330
saving case as html file
index : 1181
case_id : 3768353
saving case as html file
index : 1182
case_id : 3768354
saving case as html file
index : 1183
case_id : 3806118
saving case as html file
index : 1184
case_id : 4752297
saving case as html file
index : 1185
case_id : 4752541
saving case as html file
index : 1186
case_id : 3768328
saving case as html file
index : 1187
case_id : 3768319
saving case as html file
index : 1188
case_id : 3802993
saving case as html file
index : 1189
case_id : 4598807
saving case as html file
index : 1190
case_id : 4752594
saving case as html file
index : 1191
case_id : 3768322
saving case as html file
index : 1192
case_id : 4244222
saving case as html file
index : 1193
case_id : 4598919
saving case as html file
index : 1194
case_id : 4752229
saving case as html file
index : 1195
case_id : 3768334
saving case as html file
index : 1196
case_id : 3768335
saving case as ht

saving case as html file
index : 1324
case_id : 4752572
saving case as html file
index : 1325
case_id : 4752550
saving case as html file
index : 1326
case_id : 4752395
saving case as html file
index : 1327
case_id : 4240997
saving case as html file
index : 1328
case_id : 3786222
saving case as html file
index : 1329
case_id : 3786240
saving case as html file
index : 1330
case_id : 3806953
saving case as html file
index : 1331
case_id : 3768348
saving case as html file
index : 1332
case_id : 4752660
saving case as html file
index : 1333
case_id : 3802376
saving case as html file
index : 1334
case_id : 4752144
saving case as html file
index : 1335
case_id : 4752478
saving case as html file
index : 1336
case_id : 3768330
saving case as html file
index : 1337
case_id : 3768353
saving case as html file
index : 1338
case_id : 3768354
saving case as html file
index : 1339
case_id : 3806118
saving case as html file
index : 1340
case_id : 4752297
saving case as html file
index : 1341
case_id : 

saving case as html file
index : 1466
case_id : 4598919
saving case as html file
index : 1467
case_id : 4752229
saving case as html file
index : 1468
case_id : 3768334
saving case as html file
index : 1469
case_id : 3768335
saving case as html file
index : 1470
case_id : 4751956
saving case as html file
index : 1471
case_id : 4752569
saving case as html file
index : 1472
case_id : 4598901
saving case as html file
index : 1473
case_id : 3768331
saving case as html file
index : 1474
case_id : 4752247
saving case as html file
index : 1475
case_id : 4752360
saving case as html file
index : 1476
case_id : 3786235
saving case as html file
index : 1477
case_id : 3768329
saving case as html file
index : 1478
case_id : 4598874
saving case as html file
index : 1479
case_id : 3777578
saving case as html file
index : 1480
case_id : 4752572
saving case as html file
index : 1481
case_id : 4752550
saving case as html file
index : 1482
case_id : 4752395
saving case as html file
index : 1483
case_id : 

saving case as html file
index : 1610
case_id : 3768353
saving case as html file
index : 1611
case_id : 3768354
saving case as html file
index : 1612
case_id : 3806118
saving case as html file
index : 1613
case_id : 4752297
saving case as html file
index : 1614
case_id : 4752541
saving case as html file
index : 1615
case_id : 3768328
saving case as html file
index : 1616
case_id : 3768319
saving case as html file
index : 1617
case_id : 3802993
saving case as html file
index : 1618
case_id : 4598807
saving case as html file
index : 1619
case_id : 4752594
saving case as html file
index : 1620
case_id : 3768322
saving case as html file
index : 1621
case_id : 4244222
saving case as html file
index : 1622
case_id : 4598919
saving case as html file
index : 1623
case_id : 4752229
saving case as html file
index : 1624
case_id : 3768334
saving case as html file
index : 1625
case_id : 3768335
saving case as html file
index : 1626
case_id : 4751956
saving case as html file
index : 1627
case_id : 

saving case as html file
index : 1752
case_id : 3777578
saving case as html file
index : 1753
case_id : 3768326
saving case as html file
index : 1754
case_id : 4752096
saving case as html file
index : 1755
case_id : 4240987
saving case as html file
index : 1756
case_id : 3767056
saving case as html file
index : 1757
case_id : 4598921
saving case as html file
index : 1758
case_id : 3786204
saving case as html file
index : 1759
case_id : 3768333
saving case as html file
   lodge_date  DateAdded                                    DisplaySubtitle  \
0    19721025   20110520  Al mogen de inkomsten uit hier te lande gelege...   
1    19721024   20101230              Bewijs van rijden onder drankinvloed.   
2    19721011   20110520  Het in art. 14, lid 5, Vermogensbelasting '64 ...   
3    19720817   20100722  Aangezien er in werkelijkheid geen sprake is v...   
4    19720711   20100722  Twijfel omtrent jurisdictie van de Kantonrecht...   

                                        DisplayTitle

saving case as html file
index : 12
case_id : 4752243
saving case as html file
index : 13
case_id : 4752462
saving case as html file
index : 14
case_id : 3768488
saving case as html file
index : 15
case_id : 4752198
saving case as html file
index : 16
case_id : 4752793
saving case as html file
index : 17
case_id : 4752394
saving case as html file
index : 18
case_id : 4241071
saving case as html file
index : 19
case_id : 3786358
saving case as html file
index : 20
case_id : 3768464
saving case as html file
index : 21
case_id : 3768446
saving case as html file
index : 22
case_id : 3768462
saving case as html file
index : 23
case_id : 4752511
saving case as html file
index : 24
case_id : 4752596
saving case as html file
index : 25
case_id : 4752362
saving case as html file
index : 26
case_id : 3786323
saving case as html file
index : 27
case_id : 4752485
saving case as html file
index : 28
case_id : 3768460
saving case as html file
index : 29
case_id : 4598153
Expecting value: line 1 colu

saving case as html file
index : 159
case_id : 3768461
saving case as html file
index : 160
case_id : 4752095
saving case as html file
index : 161
case_id : 4752446
saving case as html file
index : 162
case_id : 3768479
saving case as html file
index : 163
case_id : 3768486
saving case as html file
index : 164
case_id : 3768474
saving case as html file
index : 165
case_id : 3768493
saving case as html file
index : 166
case_id : 4752473
saving case as html file
index : 167
case_id : 4752570
saving case as html file
index : 168
case_id : 4752243
saving case as html file
index : 169
case_id : 4752462
saving case as html file
index : 170
case_id : 3768488
saving case as html file
index : 171
case_id : 4752198
saving case as html file
index : 172
case_id : 4752793
saving case as html file
index : 173
case_id : 4752394
saving case as html file
index : 174
case_id : 4241071
saving case as html file
index : 175
case_id : 3786358
saving case as html file
index : 176
case_id : 3768464
saving cas

saving case as html file
index : 306
case_id : 4752286
saving case as html file
index : 307
case_id : 3768452
saving case as html file
index : 308
case_id : 3786328
saving case as html file
index : 309
case_id : 3768466
saving case as html file
index : 310
case_id : 4752371
saving case as html file
index : 311
case_id : 3806746
saving case as html file
index : 312
case_id : 3786322
saving case as html file
index : 313
case_id : 3786321
saving case as html file
index : 314
case_id : 3768467
saving case as html file
index : 315
case_id : 3768461
saving case as html file
index : 316
case_id : 3768444
saving case as html file
index : 317
case_id : 4752804
saving case as html file
index : 318
case_id : 4752216
saving case as html file
index : 319
case_id : 3786327
saving case as html file
index : 320
case_id : 3768474
saving case as html file
index : 321
case_id : 3768493
saving case as html file
index : 322
case_id : 4752473
saving case as html file
index : 323
case_id : 4752570
saving cas

saving case as html file
index : 452
case_id : 4752511
saving case as html file
index : 453
case_id : 4752596
saving case as html file
index : 454
case_id : 4752362
saving case as html file
index : 455
case_id : 3786323
saving case as html file
index : 456
case_id : 4752485
saving case as html file
index : 457
case_id : 3768460
saving case as html file
index : 458
case_id : 4598153
saving case as html file
index : 459
case_id : 3786330
saving case as html file
index : 460
case_id : 4241054
saving case as html file
index : 461
case_id : 4752228
saving case as html file
index : 462
case_id : 4752286
saving case as html file
index : 463
case_id : 3768452
saving case as html file
index : 464
case_id : 3786328
saving case as html file
index : 465
case_id : 3768466
saving case as html file
index : 466
case_id : 4752371
saving case as html file
index : 467
case_id : 3806746
saving case as html file
index : 468
case_id : 3786322
saving case as html file
index : 469
case_id : 3786321
saving cas

saving case as html file
index : 599
case_id : 3805528
saving case as html file
index : 600
case_id : 4752198
saving case as html file
index : 601
case_id : 4752793
saving case as html file
index : 602
case_id : 4752394
saving case as html file
index : 603
case_id : 4241071
saving case as html file
index : 604
case_id : 3786358
saving case as html file
index : 605
case_id : 3768464
saving case as html file
index : 606
case_id : 3768446
saving case as html file
index : 607
case_id : 3768462
saving case as html file
index : 608
case_id : 4752511
saving case as html file
index : 609
case_id : 4752596
saving case as html file
index : 610
case_id : 4752362
saving case as html file
index : 611
case_id : 3786323
saving case as html file
index : 612
case_id : 4752485
saving case as html file
index : 613
case_id : 3768460
saving case as html file
index : 614
case_id : 4598153
saving case as html file
index : 615
case_id : 3786330
saving case as html file
index : 616
case_id : 4241054
saving cas

saving case as html file
index : 746
case_id : 4752804
saving case as html file
index : 747
case_id : 4752216
saving case as html file
index : 748
case_id : 3786327
saving case as html file
index : 749
case_id : 3768455
saving case as html file
index : 750
case_id : 4752425
saving case as html file
index : 751
case_id : 4752479
saving case as html file
index : 752
case_id : 4752708
saving case as html file
index : 753
case_id : 3801852
saving case as html file
index : 754
case_id : 3786335
saving case as html file
index : 755
case_id : 3805528
saving case as html file
index : 756
case_id : 4598046
saving case as html file
index : 757
case_id : 4752781
saving case as html file
index : 758
case_id : 4752066
saving case as html file
index : 759
case_id : 3786319
saving case as html file
index : 760
case_id : 3786358
saving case as html file
index : 761
case_id : 3768464
saving case as html file
index : 762
case_id : 3768446
saving case as html file
index : 763
case_id : 3768462
saving cas

saving case as html file
index : 893
case_id : 3786328
saving case as html file
index : 894
case_id : 3768466
saving case as html file
index : 895
case_id : 4752371
saving case as html file
index : 896
case_id : 3806746
ERROR : not a correct ECLI number : we don't add the document for now 
index : 897
case_id : 3786322
Expecting value: line 1 column 1 (char 0)
weird error
saving case as html file
index : 898
case_id : 3786321
saving case as html file
index : 899
case_id : 3768467
saving case as html file
index : 900
case_id : 3768461
saving case as html file
index : 901
case_id : 3768444
saving case as html file
index : 902
case_id : 4752804
saving case as html file
index : 903
case_id : 4752216
saving case as html file
index : 904
case_id : 3786327
saving case as html file
index : 905
case_id : 3768455
saving case as html file
index : 906
case_id : 4752425
saving case as html file
index : 907
case_id : 4752479
saving case as html file
index : 908
case_id : 4752708
saving case as html 

saving case as html file
index : 1038
case_id : 4598147
saving case as html file
index : 1039
case_id : 3768468
saving case as html file
index : 1040
case_id : 3786323
saving case as html file
index : 1041
case_id : 4752485
saving case as html file
index : 1042
case_id : 3768460
saving case as html file
index : 1043
case_id : 4598153
saving case as html file
index : 1044
case_id : 3786330
saving case as html file
index : 1045
case_id : 4241054
saving case as html file
index : 1046
case_id : 4752228
saving case as html file
index : 1047
case_id : 4752286
saving case as html file
index : 1048
case_id : 3768452
saving case as html file
index : 1049
case_id : 3786328
saving case as html file
index : 1050
case_id : 3768466
saving case as html file
index : 1051
case_id : 4752371
saving case as html file
index : 1052
case_id : 3806746
saving case as html file
index : 1053
case_id : 3786322
saving case as html file
index : 1054
case_id : 3786321
ERROR : not a correct ECLI number : we don't add

saving case as html file
index : 1181
case_id : 4752708
saving case as html file
index : 1182
case_id : 3801852
saving case as html file
index : 1183
case_id : 3786335
saving case as html file
index : 1184
case_id : 3805528
saving case as html file
index : 1185
case_id : 4598046
saving case as html file
index : 1186
case_id : 4752781
saving case as html file
index : 1187
case_id : 4752066
saving case as html file
index : 1188
case_id : 3786319
saving case as html file
index : 1189
case_id : 4598129
saving case as html file
index : 1190
case_id : 3780862
saving case as html file
index : 1191
case_id : 4752798
saving case as html file
index : 1192
case_id : 4752554
saving case as html file
index : 1193
case_id : 3786313
saving case as html file
index : 1194
case_id : 4598147
saving case as html file
index : 1195
case_id : 3768468
saving case as html file
index : 1196
case_id : 3768457
saving case as html file
index : 1197
case_id : 4752047
saving case as html file
index : 1198
case_id : 

saving case as html file
index : 1326
case_id : 3786322
saving case as html file
index : 1327
case_id : 3786321
saving case as html file
index : 1328
case_id : 3768467
saving case as html file
index : 1329
case_id : 3768461
saving case as html file
index : 1330
case_id : 3768444
saving case as html file
index : 1331
case_id : 4752804
saving case as html file
index : 1332
case_id : 4752216
saving case as html file
index : 1333
case_id : 3786327
saving case as html file
index : 1334
case_id : 3768455
saving case as html file
index : 1335
case_id : 4752425
saving case as html file
index : 1336
case_id : 4752479
saving case as html file
index : 1337
case_id : 4752708
saving case as html file
index : 1338
case_id : 3801852
saving case as html file
index : 1339
case_id : 3786335
saving case as html file
index : 1340
case_id : 3805528
saving case as html file
index : 1341
case_id : 4598046
saving case as html file
index : 1342
case_id : 4752781
Expecting value: line 1 column 1 (char 0)
weird 

saving case as html file
index : 1470
case_id : 4752047
saving case as html file
index : 1471
case_id : 3768430
saving case as html file
index : 1472
case_id : 3768443
saving case as html file
index : 1473
case_id : 4752035
saving case as html file
index : 1474
case_id : 4752192
saving case as html file
index : 1475
case_id : 4752277
saving case as html file
index : 1476
case_id : 4752459
saving case as html file
index : 1477
case_id : 4752079
saving case as html file
index : 1478
case_id : 4241032
saving case as html file
index : 1479
case_id : 3786305
saving case as html file
index : 1480
case_id : 4752371
saving case as html file
index : 1481
case_id : 3806746
saving case as html file
index : 1482
case_id : 3786322
saving case as html file
index : 1483
case_id : 3786321
saving case as html file
index : 1484
case_id : 3768467
saving case as html file
index : 1485
case_id : 3768461
saving case as html file
index : 1486
case_id : 3768444
saving case as html file
index : 1487
case_id : 

saving case as html file
index : 1615
case_id : 4752781
saving case as html file
index : 1616
case_id : 4752066
saving case as html file
index : 1617
case_id : 3786319
saving case as html file
index : 1618
case_id : 4598129
saving case as html file
index : 1619
case_id : 3780862
saving case as html file
index : 1620
case_id : 4752798
saving case as html file
index : 1621
case_id : 4752554
saving case as html file
index : 1622
case_id : 3786313
saving case as html file
index : 1623
case_id : 4598147
saving case as html file
index : 1624
case_id : 3768468
saving case as html file
index : 1625
case_id : 3768457
saving case as html file
index : 1626
case_id : 4752047
saving case as html file
index : 1627
case_id : 3768430
saving case as html file
index : 1628
case_id : 3768443
ERROR : not a correct ECLI number : we don't add the document for now 
index : 1629
case_id : 4752035
Expecting value: line 1 column 1 (char 0)
weird error
saving case as html file
index : 1630
case_id : 4752192
savi

saving case as html file
index : 1759
case_id : 4752267
saving case as html file
   lodge_date  DateAdded                                    DisplaySubtitle  \
0    19731031   20110520  Belangh. koopt in 1960 grond; op een deel hier...   
1    19731024   20110520  Belangh., Italiaanse n.v. verkocht haar produc...   
2    19730704   20100722  Nadat een fabrieksbrand was geblust, voldoet h...   
3    19730627   20110520  Belangh., werkzaam in dienstbetrekking, teelt ...   
4    19730627   20110520  De zoon van belangh. studeerde in 1967 en 1968...   

                                        DisplayTitle DocumentType       Id  \
0  BNB 1973/264 - Met noot - Hofstra, H.J. - Hoge...  Rechtspraak  4752168   
1  BNB 1973/262 - Met noot - Boer den, P. - Hoge ...  Rechtspraak  4752717   
2  Prg. 1973, 847 - Sector kanton Rechtbank Den H...  Rechtspraak  3806744   
3  BNB 1973/194 - Met noot - Dijck van, J.E.A.M. ...  Rechtspraak  4752507   
4  BNB 1973/214 - Met noot - Duijn van, IJ.D.C. -...  

saving case as html file
index : 28
case_id : 4753300
saving case as html file
index : 29
case_id : 4752896
saving case as html file
index : 30
case_id : 4752721
saving case as html file
index : 31
case_id : 4753337
saving case as html file
index : 32
case_id : 4241110
saving case as html file
index : 33
case_id : 3786442
saving case as html file
index : 34
case_id : 4241125
saving case as html file
index : 35
case_id : 3768587
saving case as html file
index : 36
case_id : 3786419
saving case as html file
index : 37
case_id : 4752706
saving case as html file
index : 38
case_id : 4753059
saving case as html file
index : 39
case_id : 4753116
saving case as html file
index : 40
case_id : 4753419
saving case as html file
index : 41
case_id : 3786460
saving case as html file
index : 42
case_id : 3768602
saving case as html file
index : 43
case_id : 3768618
saving case as html file
index : 44
case_id : 4599201
saving case as html file
index : 45
case_id : 3768614
saving case as html file
ind

saving case as html file
index : 175
case_id : 4599220
saving case as html file
index : 176
case_id : 3768609
saving case as html file
index : 177
case_id : 3786420
saving case as html file
index : 178
case_id : 3768616
saving case as html file
index : 179
case_id : 4753032
saving case as html file
index : 180
case_id : 4752899
saving case as html file
index : 181
case_id : 4753148
saving case as html file
index : 182
case_id : 4752780
saving case as html file
index : 183
case_id : 4753160
saving case as html file
index : 184
case_id : 4753300
saving case as html file
index : 185
case_id : 4752896
saving case as html file
index : 186
case_id : 4752721
saving case as html file
index : 187
case_id : 4753337
saving case as html file
index : 188
case_id : 4241110
saving case as html file
index : 189
case_id : 3786442
saving case as html file
index : 190
case_id : 4241125
saving case as html file
index : 191
case_id : 3768587
saving case as html file
index : 192
case_id : 3786419
saving cas

saving case as html file
index : 322
case_id : 4753000
saving case as html file
index : 323
case_id : 4752636
saving case as html file
index : 324
case_id : 4752604
saving case as html file
index : 325
case_id : 4752940
saving case as html file
index : 326
case_id : 3786456
saving case as html file
index : 327
case_id : 3786464
saving case as html file
index : 328
case_id : 3763290
saving case as html file
index : 329
case_id : 4599215
saving case as html file
index : 330
case_id : 3768608
saving case as html file
index : 331
case_id : 4599220
saving case as html file
index : 332
case_id : 3768609
saving case as html file
index : 333
case_id : 3786420
saving case as html file
index : 334
case_id : 3768616
saving case as html file
index : 335
case_id : 4753032
saving case as html file
index : 336
case_id : 4752899
saving case as html file
index : 337
case_id : 4753148
saving case as html file
index : 338
case_id : 4752780
saving case as html file
index : 339
case_id : 4753160
saving cas

saving case as html file
index : 470
case_id : 4752908
saving case as html file
index : 471
case_id : 4752730
saving case as html file
index : 472
case_id : 3786431
saving case as html file
index : 473
case_id : 3801055
saving case as html file
index : 474
case_id : 3801456
saving case as html file
index : 475
case_id : 3803882
saving case as html file
index : 476
case_id : 4752854
saving case as html file
index : 477
case_id : 3786428
saving case as html file
index : 478
case_id : 3786441
saving case as html file
index : 479
case_id : 3786432
saving case as html file
index : 480
case_id : 4752604
saving case as html file
index : 481
case_id : 4752940
saving case as html file
index : 482
case_id : 3786456
saving case as html file
index : 483
case_id : 3786464
saving case as html file
index : 484
case_id : 3763290
saving case as html file
index : 485
case_id : 4599215
saving case as html file
index : 486
case_id : 3768608
saving case as html file
index : 487
case_id : 4599220
saving cas

saving case as html file
index : 617
case_id : 4241110
ERROR : not a correct ECLI number : we don't add the document for now 
index : 618
case_id : 3786442
Expecting value: line 1 column 1 (char 0)
weird error
saving case as html file
index : 619
case_id : 4241125
saving case as html file
index : 620
case_id : 3768587
saving case as html file
index : 621
case_id : 3786419
saving case as html file
index : 622
case_id : 4752706
saving case as html file
index : 623
case_id : 4753059
saving case as html file
index : 624
case_id : 4753116
saving case as html file
index : 625
case_id : 3803022
saving case as html file
index : 626
case_id : 4752908
saving case as html file
index : 627
case_id : 4752730
saving case as html file
index : 628
case_id : 3786431
saving case as html file
index : 629
case_id : 3801055
saving case as html file
index : 630
case_id : 3801456
saving case as html file
index : 631
case_id : 3803882
saving case as html file
index : 632
case_id : 4752854
saving case as html 

KeyboardInterrupt: 

In [23]:
get_document(4598940)

'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html xmlns="http://www.w3.org/1999/xhtml">\n<head>\n<META http-equiv="Content-Type" content="text/html; charset=UTF-8">\n<title>BR 1972, p. 574: Hof Arnhem, 06-06-1972</title>\n<meta name="viewport" content="user-scalable=no, width=device-width">\n<meta name="format-detection" content="telephone=no">\n</head>\n<body class="body">\n<div id="innerbody">\n<div id="tocArea"></div>\n<div id="contentArea">\n<div class="document kluwer" documentId="4598940">\n<div class="document-option"></div>\n<div class="meta-content">\n<div class="meta-content-header">BR 1972, p. 574: Hof Arnhem, 06-06-1972</div>\n<div class="meta-content-item">\n<span class="meta-title">Instantie: </span>Hof Arnhem</div>\n<div class="meta-content-item">\n<span class="meta-title">Datum: </span>1972-06-06</div>\n<div class="meta-content-item">\n<span class="meta-title">Magistraten: </span>Schaepman, Vr