In [1]:
import pandas as pd
import numpy as np
from tqdm import notebook
import ast
import re
from fuzzywuzzy import fuzz
from datetime import datetime, timedelta
import spacy

In [2]:
def translate_rank(rank, translator):
    if rank in translator['(all occupations)'].tolist():
        return translator.index[translator['(all occupations)'] == rank][0]
    else:
        return rank

In [3]:
notebook.tqdm.pandas()
clean = pd.read_csv('../../clean_data.csv')
voc_df = pd.read_csv('../vocop-clustered-new.csv', sep='	')
uuid = []
name = []
for y, z in notebook.tqdm(clean.iterrows()):
    for x in ast.literal_eval(z.namen):
        if x['tussenvoegsel'] != None:
            name.append(x['voornaam'] + " " + x['tussenvoegsel'] + " " + x['achternaam'])
            uuid.append(z.uuid)
        elif x['voornaam'] and x['achternaam'] != None:
            name.append(x['voornaam'] + " " + x['achternaam'])
            uuid.append(z.uuid)
name_list = pd.DataFrame(data={'uuid':uuid, 'name':name}, columns=['uuid', 'name'])
name_df = clean.merge(name_list)
rangen = pd.read_excel('../../vocop_rangen.xlsx', index_col=0)
voc_df['dutch_rank'] = [translate_rank(x, rangen) for x in notebook.tqdm(voc_df['rank'].tolist())]

  from pandas import Panel
  interactivity=interactivity, compiler=compiler, result=result)


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




HBox(children=(IntProgress(value=0, max=774200), HTML(value='')))




In [4]:
def fuzzy_search(name, voc, distance):
    names = np.where((voc.fullNameNormalized.apply(fuzz.ratio, args=[name]) >= 90) | 
                     (voc.fullNameOriginal.dropna().apply(fuzz.ratio, args=[name]) >= 90))
    return names[0]

def find_matches(names, voc, distance):
    name_list = {}
    final = []
    for x in notebook.tqdm(names):
        if x in name_list:
            final.append((name_list[x]))
        else:
            result = fuzzy_search(x, voc, distance)
            name_list[x] = result
            final.append(result)
    return final

def get_voc_data(matches, voc):
    if len(matches) != 0:
        data = []
        for index in matches:
            holder = {}
            holder['index'] = index
            holder['name_original'] = voc.iloc[index].fullNameOriginal
            holder['name_normalized'] = voc.iloc[index].fullNameNormalized
            holder['date_out'] = voc.iloc[index].date_begin_service_complete
            holder['date_return'] = voc.iloc[index].date_end_service_complete
            holder['ship_out'] = voc.iloc[index].shipOutward
            holder['ship_return'] = voc.iloc[index].shipReturn
            holder['rank'] = voc.iloc[index]['dutch_rank']
            holder['place_of_origin'] = voc.iloc[index].placeOfOrigin
            data.append(holder)
        return data
    else:
        return 0

def get_notary_matches(row, nlp):
    holder = {}
    if row.data_matches != 0:
        for x in row.data_matches:
            ship_out, ship_return = str(x['ship_out']), str(x['ship_return']) 
            rank, place_of_origin = str(x['rank']), str(x['place_of_origin'])
            detected_ships = []
            detected_ranks = []
            detected_location = []
            
            # Find Ships
            if fuzz.partial_ratio(ship_out.lower(), str(row.beschrijving).lower()) >= 80 or fuzz.partial_ratio(ship_out.lower(), row.text.lower()) >= 80:
                    detected_ships.append(ship_out)
            if fuzz.partial_ratio(ship_return.lower(), str(row.beschrijving).lower()) >= 80 or fuzz.partial_ratio(ship_return.lower(), row.text.lower()) >= 80:
                    detected_ships.append(ship_return)

            # Find Rank
            if fuzz.partial_ratio(rank.lower(), str(row.beschrijving).lower()) >= 80 or fuzz.partial_ratio(rank.lower(), row.text.lower()) >= 80:
                detected_ranks.append(rank)

            # Find Place of Origin
            if fuzz.partial_ratio(place_of_origin.lower(), str(row.beschrijving).lower()) >= 80 or fuzz.partial_ratio(place_of_origin.lower(), row.text.lower()) >= 80:
                detected_location.append(place_of_origin)
            else:
                for ent in nlp(row.text).ents:
                    if ent.label_ == 'LOC':
                        if fuzz.ratio(place_of_origin, ent.text) >= 85:
                            detected_location.append(place_of_origin)
            holder[x['index']] = {'ships':detected_ships, 'rank':detected_ranks, 'location':detected_location}
        return holder
    else:
        return 0

def create_annotation_subset(notary, voc):
    nlp = spacy.load('nl_core_news_sm')
    return_df = notary.copy()
    return_df['index_matches'] = find_matches(return_df.name, voc, 80)
    return_df['data_matches'] = return_df.index_matches.progress_apply(get_voc_data, args=[voc])
    return_df = return_df.drop('index_matches', axis=1)
    return_df['data_entry'] = return_df.progress_apply(get_notary_matches, args=[nlp], axis=1)
    return return_df

# def annotate(row):
#     notary_date = datetime.strptime(row.datering, '%Y-%m-%d')
#     if row.data_matches != '0':
#         #print(row.data_matches)
#         #for person in row.data_matches:
#         for person in row.data_matches:
#             try:
#                 out_date = datetime.strptime(person['date_out'], '%Y-%m-%d')
#             except:
#                 out_date = datetime(year=1, month=1, day =1 )
#             try:
#                 return_date = datetime.strptime(person['date_return'], '%Y-%m-%d')
#             except:
#                 return_date = datetime(year=1, month=1, day =1 )
#             if (notary_date - out_date).days not in range(0, -91, -1) and (notary_date - return_date).days not in range(0, 91):
#                 #print('Skipped match')
#                 continue
                
#             else:
#                 print('{:10} | {:30} | {}'.format(' ', "Notary Information " + str(row.name), 'VOC Information ' + str(person['index'])))
#                 print('-' * 108)
#                 print('{:10} | {:30} | {} / {}'.format('Name', row['name'], person['name_original'], person['name_normalized']))
#                 print('{:10} | {:30} | {} / {}'.format('Dates', row.datering, person['date_out'], person['date_return']))
#                 print('{:10} | {:30} | {} / {}'.format('Ships', ' / '.join(row['data_entry'][str(person['index'])]['ships']), person['ship_out'], person['ship_return']))
#                 print('{:10} | {:30} | {}'.format('Rank', ' / '.join(row['data_entry'][str(person['index'])]['rank']), person['rank']))
#                 print('{:10} | {:30} | {}'.format('Locations', ' / '.join(row['data_entry'][str(person['index'])]['location']), person['place_of_origin']))
#                 check = False
#                 print('Are these persons the same? y/n:')
#                 while check != True:
#                     answer = input()
#                     if answer == 'y':
#                         return (person['name_original'], person['index'])
#                         check = True
#                     elif answer == 'n':
#                         check = True
#                     elif answer == 'text':
#                         print(row.text)
#                     else:
#                         print("Invalid input please enter 'y', 'n', or 'text' without the quotes.")
#         return None
                
#     else:
#         return None

In [5]:
def annotate(df, prev=None):
    if prev is not None:
        final = prev.copy()
        start = len(final)
    else:
        final = pd.DataFrame(columns = df.columns)
        start = 0
    holder = []
    #stop = False
    #while stop != True:
    for row in df[start:].itertuples():
        notary_date = datetime.strptime(row.datering, '%Y-%m-%d')
        if row.data_matches != '0':
            #print(row.data_matches)
            #for person in row.data_matches:
            for person in row.data_matches:
                try:
                    out_date = datetime.strptime(person['date_out'], '%Y-%m-%d')
                except:
                    out_date = datetime(year=1, month=1, day =1 )
                try:
                    return_date = datetime.strptime(person['date_return'], '%Y-%m-%d')
                except:
                    return_date = datetime(year=1, month=1, day =1 )
                if (notary_date - out_date).days not in range(0, -91, -1) and (notary_date - return_date).days not in range(0, 91):
                    #print('Skipped match')
                    continue

                else:
                    print('{:10} | {:30} | {}'.format(' ', "Notary Information " + str(row.name), 'VOC Information ' + str(person['index'])))
                    print('-' * 108)
                    print('{:10} | {:30} | {} / {}'.format('Name', row.name, person['name_original'], person['name_normalized']))
                    print('{:10} | {:30} | {} / {}'.format('Dates', row.datering, person['date_out'], person['date_return']))
                    print('{:10} | {:30} | {} / {}'.format('Ships', ' / '.join(row.data_entry[str(person['index'])]['ships']), person['ship_out'], person['ship_return']))
                    print('{:10} | {:30} | {}'.format('Rank', ' / '.join(row.data_entry[str(person['index'])]['rank']), person['rank']))
                    print('{:10} | {:30} | {}'.format('Locations', ' / '.join(row.data_entry[str(person['index'])]['location']), person['place_of_origin']))
                    check = False
                    print('Are these persons the same? y/n:')
                    while check != True:
                        answer = input()
                        if answer == 'y':
                            holder.append((person['name_original'], person['index']))
                            final = final.append(df.loc[row.Index])
                            check = True
                        elif answer == 'n':
                            check = True
                        elif answer == 'text':
                            print(row.text)
                        elif answer == 'stop':
                            final['vocop_match'][start:] = holder
                            return final
                        else:
                            print("Invalid input please enter 'y', 'n', 'stop', or 'text' without the quotes.")
                    if answer == 'y':
                        break
                    else:
                        continue
            if answer == 'y':
                continue
            else:
                holder.append(0)
                final = final.append(df.loc[row.Index])
        else:
            holder.append(0)
            final = final.append(df.loc[row.Index])
    final['vocop_match'][start:] = holder
    return final

In [9]:
#henk1 = create_annotation_subset(name_df[0:10].copy(), voc_df)
henk1 = create_annotation_subset(name_df[0:5000].copy(), voc_df)
#henk1 = create_annotation_subset(name_df[54465:54467].copy(), voc_df)

HBox(children=(IntProgress(value=0, max=5000), HTML(value='')))




HBox(children=(IntProgress(value=0, max=5000), HTML(value='')))




HBox(children=(IntProgress(value=0, max=5000), HTML(value='')))




In [11]:
henk1.to_json('subset1.json')
#subset = pd.read_json('result.json')

In [6]:
t5t10 = create_annotation_subset(name_df[5000:10000].copy(), voc_df)

HBox(children=(IntProgress(value=0, max=5000), HTML(value='')))




HBox(children=(IntProgress(value=0, max=5000), HTML(value='')))




HBox(children=(IntProgress(value=0, max=5000), HTML(value='')))




In [7]:
t5t10.to_json('subset2.json')

In [74]:
subset

Unnamed: 0,uuid,rubriek,notaris,inventarisNr,akteNr,akteType,datering,taal,beschrijving,namen,urls,text,name,data_matches,data_entry,vocop_match
0,17599c0c-3305-165c-aae3-eddbb497e4b4,358,JAN VERLEIJ,11885,107542,Testament,1741-03-16,nederlands,,"[{'voornaam': 'Trijntje', 'tussenvoegsel': Non...","['KLAB05372000012.JPG', 'KLAB05372000013.JPG',...",N: 4 1e de Testateuren hebben verklaerd te Sam...,Trijntje Lansink,0,0,0
1,17599c0c-3305-165c-aae3-eddbb497e4b4,358,JAN VERLEIJ,11885,107542,Testament,1741-03-16,nederlands,,"[{'voornaam': 'Trijntje', 'tussenvoegsel': Non...","['KLAB05372000012.JPG', 'KLAB05372000013.JPG',...",N: 4 1e de Testateuren hebben verklaerd te Sam...,Martinus Ciprianus,0,0,0
2,17599c0c-3305-165c-aae3-eddbb497e4b4,358,JAN VERLEIJ,11885,107542,Testament,1741-03-16,nederlands,,"[{'voornaam': 'Trijntje', 'tussenvoegsel': Non...","['KLAB05372000012.JPG', 'KLAB05372000013.JPG',...",N: 4 1e de Testateuren hebben verklaerd te Sam...,Simon van Zuijderwijk,0,0,0
3,17599c0c-3305-165c-aae3-eddbb497e4b4,358,JAN VERLEIJ,11885,107542,Testament,1741-03-16,nederlands,,"[{'voornaam': 'Trijntje', 'tussenvoegsel': Non...","['KLAB05372000012.JPG', 'KLAB05372000013.JPG',...",N: 4 1e de Testateuren hebben verklaerd te Sam...,Margareta van Zuijdewijk,0,0,0
4,1b037028-bea5-0fa8-e6c5-68155f5f2b21,358,JAN VERLEIJ,11885,108046,Attestatie,1741-07-26,nederlands,\nordentelijke vrijage\n,"[{'voornaam': 'Gerard', 'tussenvoegsel': None,...","['KLAB05372000193.JPG', 'KLAB05372000194.JPG',...",Verklaing gepasseert den 26 Julij 1741No: 60- ...,Gerard Plaatman,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0bbad819-c607-7723-4b87-6260d37fb23a,358,JAN VERLEIJ,11886,11093,Attestatie,1742-01-24,nederlands,\nkennen verwantschap overledene\n,"[{'voornaam': 'Willem', 'tussenvoegsel': None,...","['KLAB05356000031.JPG', 'KLAB05356000032.JPG',...",: 16l:L: Verklaring gepasseerd den 29 Januarij...,Aaffje Lubberts de Ruijter,0,0,0
96,0bbad819-c607-7723-4b87-6260d37fb23a,358,JAN VERLEIJ,11886,11093,Attestatie,1742-01-24,nederlands,\nkennen verwantschap overledene\n,"[{'voornaam': 'Willem', 'tussenvoegsel': None,...","['KLAB05356000031.JPG', 'KLAB05356000032.JPG',...",: 16l:L: Verklaring gepasseerd den 29 Januarij...,Aaffje de Jong,0,0,0
97,0bbad819-c607-7723-4b87-6260d37fb23a,358,JAN VERLEIJ,11886,11093,Attestatie,1742-01-24,nederlands,\nkennen verwantschap overledene\n,"[{'voornaam': 'Willem', 'tussenvoegsel': None,...","['KLAB05356000031.JPG', 'KLAB05356000032.JPG',...",: 16l:L: Verklaring gepasseerd den 29 Januarij...,Paulus de Jong,"[{'index': 7600, 'name_original': 'Paulus de J...","{'7600': {'ships': [], 'rank': [], 'location':...",0
98,0bbad819-c607-7723-4b87-6260d37fb23a,358,JAN VERLEIJ,11886,11093,Attestatie,1742-01-24,nederlands,\nkennen verwantschap overledene\n,"[{'voornaam': 'Willem', 'tussenvoegsel': None,...","['KLAB05356000031.JPG', 'KLAB05356000032.JPG',...",: 16l:L: Verklaring gepasseerd den 29 Januarij...,Jan Paulusz de Jong,0,0,0


In [63]:
for x in henk1.data_matches:
    print(x)

0
0
0
0
0
0
0
[{'index': 288686, 'name_original': 'Cornelis van Someren', 'name_normalized': 'Kornelis van Somer', 'date_out': '1738-12-27', 'date_return': '1739-09-26', 'ship_out': 'NIEUWSTAD', 'ship_return': nan, 'rank': 'Metselaar', 'place_of_origin': 'Swijndregt'}, {'index': 310196, 'name_original': 'Cornelis van Someren', 'name_normalized': 'Kornelis van Somer', 'date_out': '1737-12-11', 'date_return': '1745-12-13', 'ship_out': 'Nieuwstad', 'ship_return': nan, 'rank': 'Metselaar', 'place_of_origin': 'Swijndregt'}, {'index': 321893, 'name_original': 'Cornelis van Ammelen', 'name_normalized': 'Kornelis van Ammel', 'date_out': '1727-11-02', 'date_return': '1728-04-29', 'ship_out': 'Den Dam', 'ship_return': nan, 'rank': 'Matroos', 'place_of_origin': 'Delft'}, {'index': 397388, 'name_original': 'Cornelis van Overen', 'name_normalized': 'Kornelis van Over', 'date_out': '1731-01-01', 'date_return': '1732-09-03', 'ship_out': 'Hillegonda', 'ship_return': nan, 'rank': 'Konstabel', 'place_of

In [170]:
subset

Unnamed: 0,uuid,rubriek,notaris,inventarisNr,akteNr,akteType,datering,taal,beschrijving,namen,urls,text,name,data_matches,data_entry,vocop_match
54465,da6f9044-1588-3bf2-1e2f-72b7b374e3db,358,JAN VERLEIJ,11905,18207,Machtiging,1748-09-19,nederlands,"\nschip 't Huys te Spijk, VOC kamer Zeeland; s...","[{'voornaam': 'Cornelis', 'tussenvoegsel': Non...","['KLAB05559000102.JPG', 'KLAB05559000103.JPG']",No: 259 Procuratie gepoassseert den 19e: Septe...,Cornelis Vis,"[{'index': 33017, 'name_original': 'Cornelis V...","{'33017': {'ships': [], 'rank': [], 'location'...","(Cornelis Vis, 66931)"
54466,da6f9044-1588-3bf2-1e2f-72b7b374e3db,358,JAN VERLEIJ,11905,18207,Machtiging,1748-09-19,nederlands,"\nschip 't Huys te Spijk, VOC kamer Zeeland; s...","[{'voornaam': 'Cornelis', 'tussenvoegsel': Non...","['KLAB05559000102.JPG', 'KLAB05559000103.JPG']",No: 259 Procuratie gepoassseert den 19e: Septe...,Jan Govertsz,"[{'index': 21034, 'name_original': 'Jan Govers...","{'21034': {'ships': [], 'rank': [], 'location'...","(Jan Govertsz., 66979)"


In [125]:
testje = pd.DataFrame(columns = subset.columns)

In [142]:
testje

Unnamed: 0,uuid,rubriek,notaris,inventarisNr,akteNr,akteType,datering,taal,beschrijving,namen,urls,text,name,data_matches,data_entry,vocop_match
54465,da6f9044-1588-3bf2-1e2f-72b7b374e3db,358,JAN VERLEIJ,11905,18207,Machtiging,1748-09-19,nederlands,"\nschip 't Huys te Spijk, VOC kamer Zeeland; s...","[{'voornaam': 'Cornelis', 'tussenvoegsel': Non...","['KLAB05559000102.JPG', 'KLAB05559000103.JPG']",No: 259 Procuratie gepoassseert den 19e: Septe...,Cornelis Vis,"[{'index': 33017, 'name_original': 'Cornelis V...","{'33017': {'ships': [], 'rank': [], 'location'...","(Cornelis Vis, 66931)"
54466,da6f9044-1588-3bf2-1e2f-72b7b374e3db,358,JAN VERLEIJ,11905,18207,Machtiging,1748-09-19,nederlands,"\nschip 't Huys te Spijk, VOC kamer Zeeland; s...","[{'voornaam': 'Cornelis', 'tussenvoegsel': Non...","['KLAB05559000102.JPG', 'KLAB05559000103.JPG']",No: 259 Procuratie gepoassseert den 19e: Septe...,Jan Govertsz,"[{'index': 21034, 'name_original': 'Jan Govers...","{'21034': {'ships': [], 'rank': [], 'location'...","(Jan Govertsz., 66979)"


In [139]:
subset

Unnamed: 0,uuid,rubriek,notaris,inventarisNr,akteNr,akteType,datering,taal,beschrijving,namen,urls,text,name,data_matches,data_entry,vocop_match
54465,da6f9044-1588-3bf2-1e2f-72b7b374e3db,358,JAN VERLEIJ,11905,18207,Machtiging,1748-09-19,nederlands,"\nschip 't Huys te Spijk, VOC kamer Zeeland; s...","[{'voornaam': 'Cornelis', 'tussenvoegsel': Non...","['KLAB05559000102.JPG', 'KLAB05559000103.JPG']",No: 259 Procuratie gepoassseert den 19e: Septe...,Cornelis Vis,"[{'index': 33017, 'name_original': 'Cornelis V...","{'33017': {'ships': [], 'rank': [], 'location'...","(Cornelis Vis, 66931)"
54466,da6f9044-1588-3bf2-1e2f-72b7b374e3db,358,JAN VERLEIJ,11905,18207,Machtiging,1748-09-19,nederlands,"\nschip 't Huys te Spijk, VOC kamer Zeeland; s...","[{'voornaam': 'Cornelis', 'tussenvoegsel': Non...","['KLAB05559000102.JPG', 'KLAB05559000103.JPG']",No: 259 Procuratie gepoassseert den 19e: Septe...,Jan Govertsz,"[{'index': 21034, 'name_original': 'Jan Govers...","{'21034': {'ships': [], 'rank': [], 'location'...","(Jan Govertsz., 66979)"


In [31]:
sub = pd.read_json('subset1.json')
sub2 = pd.read_json('subset2.json')

In [41]:
sub2[sub2.data_matches != 0][0:350]

Unnamed: 0,uuid,rubriek,notaris,inventarisNr,akteNr,akteType,datering,taal,beschrijving,namen,urls,text,name,data_matches,data_entry
5000,6a8c2759-a8a2-739c-7c88-66cc2b3074b9,358,JAN VERLEIJ,11945,26354,Testament,1761-06-24,nederlands,,"[{'voornaam': 'Magteld', 'tussenvoegsel': None...","['KLAB06150000131.JPG', 'KLAB06150000132.JPG',...",160 Mgendes Dit Testament van Man No: 7160 en ...,Hendrik Verhoeve,"[{'index': 83119, 'name_original': 'Hendrikus ...","{'83119': {'ships': ['nan'], 'rank': [], 'loca..."
5001,6a8c2759-a8a2-739c-7c88-66cc2b3074b9,358,JAN VERLEIJ,11945,26354,Testament,1761-06-24,nederlands,,"[{'voornaam': 'Magteld', 'tussenvoegsel': None...","['KLAB06150000131.JPG', 'KLAB06150000132.JPG',...",160 Mgendes Dit Testament van Man No: 7160 en ...,Jan Gielink,"[{'index': 69002, 'name_original': 'Jan Geleij...","{'69002': {'ships': [], 'rank': [], 'location'..."
5004,7cbe357d-8773-4529-dafd-8aced1e59ed8,358,JAN VERLEIJ,11945,27395,Machtiging,1761-05-20,nederlands,\nSchip de Jonge Catharina\n,"[{'voornaam': 'Sicke', 'tussenvoegsel': None, ...","['KLAB06150000050.JPG', 'KLAB06150000051.JPG',...",Aatijs N �134 Procuratie Gepasseert den 20 Mai...,Abraham Mijlius,"[{'index': 298118, 'name_original': 'Abraham M...","{'298118': {'ships': [], 'rank': [], 'location..."
5008,82b84e18-7c79-e4b0-4a2f-6a7243e71221,358,JAN VERLEIJ,11945,27736,Machtiging,1761-07-13,nederlands,,"[{'voornaam': 'Elsje', 'tussenvoegsel': None, ...","['KLAB06150000181.JPG', 'KLAB06150000182.JPG',...",No: 176 Procuratie gepasseert den 13e: alij 17...,Elsje Pieters,"[{'index': 407589, 'name_original': 'Elsje Pie...","{'407589': {'ships': ['nan'], 'rank': [], 'loc..."
5009,82b84e18-7c79-e4b0-4a2f-6a7243e71221,358,JAN VERLEIJ,11945,27736,Machtiging,1761-07-13,nederlands,,"[{'voornaam': 'Elsje', 'tussenvoegsel': None, ...","['KLAB06150000181.JPG', 'KLAB06150000182.JPG',...",No: 176 Procuratie gepasseert den 13e: alij 17...,Hendrik Wigman,"[{'index': 13368, 'name_original': 'Hendrik We...","{'13368': {'ships': [], 'rank': [], 'location'..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5602,da7bcac6-eebe-a0b5-b863-9b5f020e2754,358,JAN VERLEIJ,11953,32645,Testament,1764-02-14,nederlands,"\nHaarlemmerdijk, langstlevende testament\n","[{'voornaam': 'Jan', 'tussenvoegsel': None, 'a...","['KLAB06159000161.JPG', 'KLAB06159000162.JPG',...",No: 95 Testament gepasseert den 14e: febr: 176...,Jan Duijff,"[{'index': 11478, 'name_original': 'Jan Kuijff...","{'11478': {'ships': [], 'rank': [], 'location'..."
5604,ec86eb9e-7c9b-a20e-0ba9-31dc4e2d05eb,358,JAN VERLEIJ,11953,33691,Kwitantie,1764-01-19,nederlands,"\nSchip De Bregitta Antonia, crediteuren van o...","[{'voornaam': 'Andries', 'tussenvoegsel': None...","['KLAB06159000055.JPG', 'KLAB06159000056.JPG',...",No: 17 Quitantie gepasseert den 19e: Januarij ...,Andries Boijer,"[{'index': 55553, 'name_original': 'Andries Bo...","{'55553': {'ships': [], 'rank': [], 'location'..."
5605,ec86eb9e-7c9b-a20e-0ba9-31dc4e2d05eb,358,JAN VERLEIJ,11953,33691,Kwitantie,1764-01-19,nederlands,"\nSchip De Bregitta Antonia, crediteuren van o...","[{'voornaam': 'Andries', 'tussenvoegsel': None...","['KLAB06159000055.JPG', 'KLAB06159000056.JPG',...",No: 17 Quitantie gepasseert den 19e: Januarij ...,Roeloff Roeloffsz,"[{'index': 1511, 'name_original': 'Roeloff Roe...","{'1511': {'ships': [], 'rank': [], 'location':..."
5607,ec86eb9e-7c9b-a20e-0ba9-31dc4e2d05eb,358,JAN VERLEIJ,11953,33691,Kwitantie,1764-01-19,nederlands,"\nSchip De Bregitta Antonia, crediteuren van o...","[{'voornaam': 'Andries', 'tussenvoegsel': None...","['KLAB06159000055.JPG', 'KLAB06159000056.JPG',...",No: 17 Quitantie gepasseert den 19e: Januarij ...,Jacob Fredrik Stijnman,"[{'index': 261563, 'name_original': 'Jacob Fre...","{'261563': {'ships': [], 'rank': [], 'location..."


In [20]:
sub[0:661].to_json('batch1_patrick.json')
sub[661:1350].to_json('batch1_thom.json')
sub[1350:1924].to_json('batch1_chiel.json')

In [25]:
sub[1924:2529].to_json('batch2_patrick.json')
sub[2529:3134].to_json('batch2_thom.json')
sub[3134:3797].to_json('batch2_chiel.json')

In [45]:
sub[3793:4416].to_json('batch3_patrick.json')
sub[4416:5000].to_json('batch3_thom.json')
sub2[0:611].to_json('batch3_chiel.json')

In [44]:
sub2[0:611]

Unnamed: 0,uuid,rubriek,notaris,inventarisNr,akteNr,akteType,datering,taal,beschrijving,namen,urls,text,name,data_matches,data_entry
5000,6a8c2759-a8a2-739c-7c88-66cc2b3074b9,358,JAN VERLEIJ,11945,26354,Testament,1761-06-24,nederlands,,"[{'voornaam': 'Magteld', 'tussenvoegsel': None...","['KLAB06150000131.JPG', 'KLAB06150000132.JPG',...",160 Mgendes Dit Testament van Man No: 7160 en ...,Hendrik Verhoeve,"[{'index': 83119, 'name_original': 'Hendrikus ...","{'83119': {'ships': ['nan'], 'rank': [], 'loca..."
5001,6a8c2759-a8a2-739c-7c88-66cc2b3074b9,358,JAN VERLEIJ,11945,26354,Testament,1761-06-24,nederlands,,"[{'voornaam': 'Magteld', 'tussenvoegsel': None...","['KLAB06150000131.JPG', 'KLAB06150000132.JPG',...",160 Mgendes Dit Testament van Man No: 7160 en ...,Jan Gielink,"[{'index': 69002, 'name_original': 'Jan Geleij...","{'69002': {'ships': [], 'rank': [], 'location'..."
5002,6a8c2759-a8a2-739c-7c88-66cc2b3074b9,358,JAN VERLEIJ,11945,26354,Testament,1761-06-24,nederlands,,"[{'voornaam': 'Magteld', 'tussenvoegsel': None...","['KLAB06150000131.JPG', 'KLAB06150000132.JPG',...",160 Mgendes Dit Testament van Man No: 7160 en ...,Harmijntje ten Bosch,0,0
5003,7cbe357d-8773-4529-dafd-8aced1e59ed8,358,JAN VERLEIJ,11945,27395,Machtiging,1761-05-20,nederlands,\nSchip de Jonge Catharina\n,"[{'voornaam': 'Sicke', 'tussenvoegsel': None, ...","['KLAB06150000050.JPG', 'KLAB06150000051.JPG',...",Aatijs N �134 Procuratie Gepasseert den 20 Mai...,Sicke Jentjes,0,0
5004,7cbe357d-8773-4529-dafd-8aced1e59ed8,358,JAN VERLEIJ,11945,27395,Machtiging,1761-05-20,nederlands,\nSchip de Jonge Catharina\n,"[{'voornaam': 'Sicke', 'tussenvoegsel': None, ...","['KLAB06150000050.JPG', 'KLAB06150000051.JPG',...",Aatijs N �134 Procuratie Gepasseert den 20 Mai...,Abraham Mijlius,"[{'index': 298118, 'name_original': 'Abraham M...","{'298118': {'ships': [], 'rank': [], 'location..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5606,ec86eb9e-7c9b-a20e-0ba9-31dc4e2d05eb,358,JAN VERLEIJ,11953,33691,Kwitantie,1764-01-19,nederlands,"\nSchip De Bregitta Antonia, crediteuren van o...","[{'voornaam': 'Andries', 'tussenvoegsel': None...","['KLAB06159000055.JPG', 'KLAB06159000056.JPG',...",No: 17 Quitantie gepasseert den 19e: Januarij ...,Maurits Kier,0,0
5607,ec86eb9e-7c9b-a20e-0ba9-31dc4e2d05eb,358,JAN VERLEIJ,11953,33691,Kwitantie,1764-01-19,nederlands,"\nSchip De Bregitta Antonia, crediteuren van o...","[{'voornaam': 'Andries', 'tussenvoegsel': None...","['KLAB06159000055.JPG', 'KLAB06159000056.JPG',...",No: 17 Quitantie gepasseert den 19e: Januarij ...,Jacob Fredrik Stijnman,"[{'index': 261563, 'name_original': 'Jacob Fre...","{'261563': {'ships': [], 'rank': [], 'location..."
5608,ec86eb9e-7c9b-a20e-0ba9-31dc4e2d05eb,358,JAN VERLEIJ,11953,33691,Kwitantie,1764-01-19,nederlands,"\nSchip De Bregitta Antonia, crediteuren van o...","[{'voornaam': 'Andries', 'tussenvoegsel': None...","['KLAB06159000055.JPG', 'KLAB06159000056.JPG',...",No: 17 Quitantie gepasseert den 19e: Januarij ...,Hendrik Kannegieter Jansz,0,0
5609,ec86eb9e-7c9b-a20e-0ba9-31dc4e2d05eb,358,JAN VERLEIJ,11953,33691,Kwitantie,1764-01-19,nederlands,"\nSchip De Bregitta Antonia, crediteuren van o...","[{'voornaam': 'Andries', 'tussenvoegsel': None...","['KLAB06159000055.JPG', 'KLAB06159000056.JPG',...",No: 17 Quitantie gepasseert den 19e: Januarij ...,Racheltje Teunis,0,0


In [36]:
sub[4416:5000]

Unnamed: 0,uuid,rubriek,notaris,inventarisNr,akteNr,akteType,datering,taal,beschrijving,namen,urls,text,name,data_matches,data_entry
4416,ba75d123-80d8-f682-5f80-37297b458a60,358,JAN VERLEIJ,11935,30868,Scheepsverklaring,1758-05-16,nederlands,"\nSchip 't Raadhuijs van Dockem, Engels kapers...","[{'voornaam': 'Sijmen', 'tussenvoegsel': None,...","['KLAB05923000069.JPG', 'KLAB05923000070.JPG',...",1: 2e:No: 163 Verklaring gepasseerd den 16 Mai...,Jurriaan Meijer,"[{'index': 25324, 'name_original': 'Jurrien Me...","{'25324': {'ships': [], 'rank': [], 'location'..."
4417,ba75d123-80d8-f682-5f80-37297b458a60,358,JAN VERLEIJ,11935,30868,Scheepsverklaring,1758-05-16,nederlands,"\nSchip 't Raadhuijs van Dockem, Engels kapers...","[{'voornaam': 'Sijmen', 'tussenvoegsel': None,...","['KLAB05923000069.JPG', 'KLAB05923000070.JPG',...",1: 2e:No: 163 Verklaring gepasseerd den 16 Mai...,Roeloff Janssen,"[{'index': 1513, 'name_original': 'Roeloff Jan...","{'1513': {'ships': [], 'rank': [], 'location':..."
4418,e1299186-f8ed-b460-9214-6c2a767fb092,358,JAN VERLEIJ,11935,33057,Kwitantie,1758-08-13,nederlands,,"[{'voornaam': 'Elsje', 'tussenvoegsel': None, ...","['KLAB05923000404.JPG', 'KLAB05923000405.JPG',...",No: 287 Quitantie gepasseert den 3e: ang: 1758...,Elsje Everts,0,0
4419,e1299186-f8ed-b460-9214-6c2a767fb092,358,JAN VERLEIJ,11935,33057,Kwitantie,1758-08-13,nederlands,,"[{'voornaam': 'Elsje', 'tussenvoegsel': None, ...","['KLAB05923000404.JPG', 'KLAB05923000405.JPG',...",No: 287 Quitantie gepasseert den 3e: ang: 1758...,Tjerk Haijen Cramer,0,0
4420,e1299186-f8ed-b460-9214-6c2a767fb092,358,JAN VERLEIJ,11935,33057,Kwitantie,1758-08-13,nederlands,,"[{'voornaam': 'Elsje', 'tussenvoegsel': None, ...","['KLAB05923000404.JPG', 'KLAB05923000405.JPG',...",No: 287 Quitantie gepasseert den 3e: ang: 1758...,Jan Focking,"[{'index': 95332, 'name_original': 'Jan Hockin...","{'95332': {'ships': ['nan'], 'rank': [], 'loca..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,50566635-f3c7-0eb6-d2c7-64c12ec94030,358,JAN VERLEIJ,11945,24916,Machtiging,1761-07-14,nederlands,\nSchip de Zeeplaeg.\n,"[{'voornaam': 'Jan Theodore', 'tussenvoegsel':...","['KLAB06150000186.JPG', 'KLAB06150000187.JPG',...",No: 178 Procuratie gepasseert den 14e: Julij 1...,Tjerran Hoowers Holm,0,0
4996,50566635-f3c7-0eb6-d2c7-64c12ec94030,358,JAN VERLEIJ,11945,24916,Machtiging,1761-07-14,nederlands,\nSchip de Zeeplaeg.\n,"[{'voornaam': 'Jan Theodore', 'tussenvoegsel':...","['KLAB06150000186.JPG', 'KLAB06150000187.JPG',...",No: 178 Procuratie gepasseert den 14e: Julij 1...,Hendrik Edelhoff,"[{'index': 29972, 'name_original': 'Hendrik Ee...","{'29972': {'ships': [], 'rank': [], 'location'..."
4997,50566635-f3c7-0eb6-d2c7-64c12ec94030,358,JAN VERLEIJ,11945,24916,Machtiging,1761-07-14,nederlands,\nSchip de Zeeplaeg.\n,"[{'voornaam': 'Jan Theodore', 'tussenvoegsel':...","['KLAB06150000186.JPG', 'KLAB06150000187.JPG',...",No: 178 Procuratie gepasseert den 14e: Julij 1...,Pieter Theodoor van Teijlingen,0,0
4998,6a8c2759-a8a2-739c-7c88-66cc2b3074b9,358,JAN VERLEIJ,11945,26354,Testament,1761-06-24,nederlands,,"[{'voornaam': 'Magteld', 'tussenvoegsel': None...","['KLAB06150000131.JPG', 'KLAB06150000132.JPG',...",160 Mgendes Dit Testament van Man No: 7160 en ...,Magteld Tiethoff,0,0
