In [75]:
# Import necessary modules
import pandas as pd
import numpy as np
import difflib

def within_range(checknumber, range_start, range_end):
    """" 
    Function to check if a BGB booking year falls within the timespan
    in DAS referred to in the introduction to this script
    
    """
    if (checknumber >= range_start) & (checknumber <= (range_end + 20)):
        return True
    else:
        return False

In [29]:
# First, load the entire Zeemonsterrollen database
gzm = pd.ExcelFile('zeemonsterrollen.xlsx')
bgbmatch = pd.ExcelFile('Matching_results.xlsx')

In [30]:
# Parse the Excel sheets we will be using
zeemons = gzm.parse('Database GZM (MvR 2014)')
zeemons = zeemons.set_index('ID')

bgbnomatch = bgbmatch.parse('No match')

In [18]:
# Loop over all entries without a corresponding DAS Ship ID

gzm_ships = []

for entry in zeemons[zeemons['DAS SHIP ID'].isna()].index:
    jaar = zeemons.loc[entry, 'JAAR']
    shipname = zeemons.loc[entry, 'NAAM SCHIP (GESTANDAARDISEERD)']
    this_entry = [jaar, shipname]
    gzm_ships.append(this_entry)


fulldata = pd.DataFrame.from_records(gzm_ships, columns=['Year', 'Shipname'])

In [19]:
fulldata

Unnamed: 0,Year,Shipname
0,1691,SINT NICOLAAS
1,1691,GRIJPVOGEL
2,1691,BATAVIA
3,1691,STANDVASTIGHEID
4,1691,WIJK OP ZEE
...,...,...
1707,1790,TRITON
1708,1790,AFRIKAAN
1709,1791,ORANJEBOOM
1710,1791,CORNELIA ADRIANA


In [43]:
summary = fulldata.groupby('Shipname')['Year'].agg({'min', 'max'}).reset_index()
summary['Shipname'] = summary['Shipname'].str.lower()
summary

Unnamed: 0,Shipname,min,max
0,aardenhout,1696,1706
1,achilles,1714,1719
2,adam,1712,1719
3,adam en eva,1718,1718
4,adriana,1736,1754
...,...,...,...
398,zwaardvis,1697,1708
399,zwarte arend,1696,1696
400,zwerver,1732,1741
401,[eiland edam],1703,1703


In [64]:
gzm_shiplist = list(summary['Shipname'].unique())
gzm_shiplist.remove('[onbekend]')

In [52]:
bgbnomatch['BGB ship name'] = bgbnomatch['BGB ship name'].str.lower()

In [53]:
bgbnomatch

Unnamed: 0.1,Unnamed: 0,BGB Shipvoyage ID,BGB Voyage ID,BGB ship ID,BGB ship name,BGB Booking year
0,0,139,99448,3150,kleine pallas,1790
1,1,145,99454,3153,langmoedigheid,1790
2,2,149,99458,3154,vredelief,1790
3,3,151,99460,3155,wilhelmina,1790
4,4,153,99462,3156,jonge wilhelmina,1790
...,...,...,...,...,...,...
3983,3983,20338,116787,3412,onbekend,1731
3984,3984,20339,117012,3412,onbekend,1723
3985,3985,20340,117815,3412,onbekend,1729
3986,3986,20341,117832,3412,onbekend,1729


In [83]:
counter = 0

for entry in bgbnomatch.index:
    bgbyear = int(bgbnomatch.loc[entry, 'BGB Booking year'])
    bgb_shipname = bgbnomatch.loc[entry, 'BGB ship name']
    checking = difflib.get_close_matches(bgb_shipname, gzm_shiplist, n=3, cutoff=0.85)
    
    if checking:
        try:
            minyear_gzb = int(summary.loc[summary['Shipname'] == bgb_shipname]['min'])
        except:
            minyear_gzb = 0
        
        try:
            maxyear_gzb = int(summary.loc[summary['Shipname'] == bgb_shipname]['max'])
        except:
            maxyear_gzb = 0

        
        if within_range(bgbyear, minyear_gzb, maxyear_gzb):
            counter += 1
            print(counter)
            print(bgb_shipname)
            print([checking[0]])
            print(bgbyear)
            print(minyear_gzb, maxyear_gzb)
            print('-------------')

1
kleine pallas
['kleine pallas']
1790
1779 1790
-------------
2
cornelia adriana
['cornelia adriana']
1780
1778 1791
-------------
3
cornelia adriana
['cornelia adriana']
1780
1778 1791
-------------
4
kleine pallas
['kleine pallas']
1780
1779 1790
-------------
5
kleine pallas
['kleine pallas']
1790
1779 1790
-------------
6
rembang
['rembang']
1790
1714 1789
-------------
7
kleine pallas
['kleine pallas']
1787
1779 1790
-------------
8
kleine pallas
['kleine pallas']
1787
1779 1790
-------------
9
kleine pallas
['kleine pallas']
1787
1779 1790
-------------
10
charlotta christina
['charlotte christina']
0
0 0
-------------
11
cornelia adriana
['cornelia adriana']
1780
1778 1791
-------------
12
kleine pallas
['kleine pallas']
1780
1779 1790
-------------
13
rembang
['rembang']
1787
1714 1789
-------------
14
cornelia adriana
['cornelia adriana']
1787
1778 1791
-------------
15
kleine pallas
['kleine pallas']
1787
1779 1790
-------------
16
kleine pallas
['kleine pallas']
1787
1779 1

1746 1752
-------------
158
ongehoorzaamheid
['ongehoorzaamheid']
1743
1742 1744
-------------
159
lamatjang
['lamatjang']
1743
1739 1739
-------------
160
anthonia
['anthonia']
1743
1725 1744
-------------
161
anthonia
['anthonia']
1743
1725 1744
-------------
162
adriana
['adriana']
1751
1736 1754
-------------
163
geertruida maria
['geertruida maria']
1743
1731 1752
-------------
164
verlangen
['verlangen']
1743
1735 1736
-------------
165
verlangen
['verlangen']
1743
1735 1736
-------------
166
wereld
['wereld']
1757
1751 1758
-------------
167
wereld
['wereld']
1757
1751 1758
-------------
168
wereld
['wereld']
1757
1751 1758
-------------
169
nederlandse maagd
['nederlandse maagd']
1751
1747 1761
-------------
170
herderin
['herderin']
1757
1698 1755
-------------
171
aurora
['aurora']
1757
1756 1756
-------------
172
anthonia dorothea
['anthonia dorothea']
1757
1745 1746
-------------
173
wereld
['wereld']
1751
1751 1758
-------------
174
wereld
['wereld']
1759
1751 1758
-------

340
anthonia
['anthonia']
1739
1725 1744
-------------
341
colombo
['colombo']
1739
1717 1762
-------------
342
jacob willem
['jacob willem']
1738
1736 1741
-------------
343
colombo
['colombo']
1738
1717 1762
-------------
344
hottentot
['hottentot']
1738
1719 1726
-------------
345
lijdzaamheid
['lijdzaamheid']
1738
1734 1753
-------------
346
lijdzaamheid
['lijdzaamheid']
1738
1734 1753
-------------
347
colombo
['colombo']
1738
1717 1762
-------------
348
olijftak
['olijftak']
1738
1728 1729
-------------
349
olijftak
['olijftak']
1738
1728 1729
-------------
350
jacob willem
['jacob willem']
1738
1736 1741
-------------
351
beschermer
['beschermer']
1732
1709 1734
-------------
352
suikermaalder
['suikermaalder']
1732
1711 1754
-------------
353
beschermer
['beschermer']
1732
1709 1734
-------------
354
noodhulp
['noodhulp']
1732
1712 1718
-------------
355
dwars in de weg
['dwars in de weg']
1732
1709 1713
-------------
356
rotte
['rotte']
1739
1736 1741
-------------
357
olijfta

1697 1714
-------------
491
beschermer
['beschermer']
1713
1709 1734
-------------
492
jambij
['jambij']
1713
1712 1719
-------------
493
leervis
['leenvis']
0
0 0
-------------
494
jambij
['jambij']
1715
1712 1719
-------------
495
dijkveld
['dijkveld']
1707
1705 1705
-------------
496
dijkveld
['dijkveld']
1707
1705 1705
-------------
497
dijkveld
['dijkveld']
1715
1705 1705
-------------
498
dijkveld
['dijkveld']
1707
1705 1705
-------------
499
dijkveld
['dijkveld']
1707
1705 1705
-------------
500
dijkveld
['dijkveld']
1707
1705 1705
-------------
501
herderin
['herderin']
1707
1698 1755
-------------
502
oostvoorn
['oostvoorn']
1707
1699 1708
-------------
503
herderin
['herderin']
1707
1698 1755
-------------
504
oostvoorn
['oostvoorn']
1707
1699 1708
-------------
505
langerak
['langerak']
1713
1713 1735
-------------
506
lijnbaan
['lijnbaan']
1715
1714 1719
-------------
507
lijnbaan
['lijnbaan']
1715
1714 1719
-------------
508
neira
['neira']
1715
1701 1711
-------------
509

659
kleine pallas
['kleine pallas']
1785
1779 1790
-------------
660
leguaan
['leguaan']
1763
1762 1762
-------------
661
suikermaalder
['suikermaalder']
1723
1711 1754
-------------
662
haai
['haai']
1723
1722 1722
-------------
663
ontong java
['ontong java']
1723
1721 1722
-------------
664
haai
['haai']
1727
1722 1722
-------------
665
haai
['haai']
1727
1722 1722
-------------
666
hitoe
['hitoe']
1701
1701 1712
-------------


In [65]:
gzm_shiplist

['aardenhout',
 'achilles',
 'adam',
 'adam en eva',
 'adriana',
 'afrika',
 'afrikaan',
 'agnieta',
 'aletta adriana',
 'allerlande',
 'ameij',
 'amstelstroom',
 'andromeda',
 'anna',
 'anthonia',
 'anthonia dorothea',
 'appelboom',
 'arend',
 'aurora',
 'awers',
 'bagger',
 'bagger; platvis; steenbrasem',
 'bakker',
 'baloebaroe',
 'banda',
 'banka',
 'bantam',
 'barcas',
 'baros',
 'basra',
 'bastaard ',
 'batavia',
 'batavier',
 'beieren',
 'bennebroek',
 'bergwerker',
 'beschermer',
 'beverwaart',
 'beverwijk',
 'biliton',
 'blauwe berg',
 'blik',
 'bliksem',
 'boekenrode',
 'bombardier',
 'bombaria',
 'bonij',
 'boot',
 'bouro',
 'brak',
 'brandenburg',
 'breedte',
 'bruinvis',
 'buis',
 'burg',
 'cacop',
 'cacop; klipvis; witvis',
 'cadirij',
 'calpetij',
 'cananoor',
 'casuaris',
 'catharina',
 'ceilon',
 'ceram',
 'charlotte christina',
 'cheribon',
 'cicero',
 'cochin',
 'colombo',
 'constantia',
 'cornelia',
 'cornelia adriana',
 'cornelia hillegonda',
 'cornelia jacoba',
 '