In [None]:
import folium
from folium import plugins
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
import os
from lxml import etree
import pandas as pd

geolocator = Nominatim(user_agent="maps1")


### static variables and functions

In [None]:
#dictionary of months to numbers
months_to_numbers = {1: ["jan.", "januari"],2: ["febr.", "februari", "februrai"],3: ["maa.", "maart"],4: ["apr.", "april"],
                     5: ["mei"],6: ["jun.", "juni"],7: ["jul.", "juli"],8: ["aug.", "augustus"],
                    9: ["sept.", "september"],10: ["oct.","okt.", "oktober"],11: ["nov.", "november"],12: ["dec.", "december"]}

In [None]:

def addInfoToPoints(location, file):

    date = dct_dates.get(file, None)
    coord  = modern_loc.get(location, None)

    
    if date != None:
        if coord != None: 
            points.append({
                'name': location,
                'time': date,
                'file': file,
                'coord': coord
                })
            

    
def checkfornext(df, index, column):
    
    try:
        if df.iat[index, 2] == "I-LOC":
            return(1)
    except:
        return(0)
    return(0)

### extracting dates and pages from xml-files

In [None]:
directory = os.getcwd()+ os.sep+ "gm-xml"

dct_dates = {}

dct_volume_page = {}

for filename in os.listdir(directory):
    if filename.endswith(".xml"):
        with open(directory+os.sep+filename, "r", encoding = "utf-8") as infile:
            text = infile.read()
            root = etree.fromstring(text)
            
            #get the date
            date = root[0][0][1][0][0].text
            
            #Some files have no date mentioned, and will thus be ignored
            #other files have weird text in the date-entry, and will also be ignored
            try:
                words_date = date.split()
                if words_date[2].endswith("."):
                    words_date[2] = words_date[2].strip(".")
                new_year = int(words_date[2]) + 1000
            except:
                continue
            
            new_month = "default"
            
            for month in months_to_numbers.items():
                if words_date[1] in month[1]:
                    new_month = month[0]
                    
            #testing for mistakes        
            if new_month == "default":
                print(words_date[1])
            
            #correcting for outlier
            if words_date[1] == "en":
                words_date[0] = "9"
                new_month = 3
                new_year = 2683
            
            #add new dates together
            new_date = str(new_year)+ "-" + str(new_month) + "-" +  words_date[0]
            #put in dictionary without the extension .xml
            dct_dates[filename[:-4]] = new_date
            
            #extracting missive number and page number
            missive = root[0][0][1][0][1].text
            page = root[0][0][3][0][0][0][0].text
            dct_volume_page[filename[:-4]] = [missive[12:], page]
            
            

### reading in the annotated locations

In [None]:
df = pd.read_csv(getcwd()+ os.sep+ "locaties1.tsv", encoding = "utf-8", sep = "\t", header = None)
df1 = df.fillna(0)

df1

### for each location, lookup the coordinates

This code can take a while to run, since it makes use of a timeout per lookup to minimize the stress on the servers

In [None]:
def lookupCoord(location):
    
    location = geolocator.geocode(row[2])
    longlat = [location.longitude, location.latitude]
    
    return(longlat)

In [None]:
modern_loc1 = {}

for index, row in df1.iterrows():
    try:
        if row[2] != "O":
            location = geolocator.geocode(row[2], timeout=20)
            try:
                longlat = [location.longitude, location.latitude]
            except:
                print(f"failed to lookup the following location: {row[0]}; {row[2]}")
                if row[2] == "0" or row[2] == 0:
                    break
            modern_loc1[row[0]] =  longlat
        else:
            continue
    except GeocoderTimedOut as e:
        print("the following location couldn't be looked up due to a timeout: " + str(row))
    
    

### saving the coordinates

In [None]:
with open("locatiescoord1.tsv", "w") as outfile:
    for key, value in modern_loc1.items():
        outfile.write(key + "\t" + str(value) + "\n")

### for the found coordinates, add it to the list "points"

In [None]:
#directory to the system output
directory = getcwd()+ os.sep+ "system_output"

#first default value on the map
points = [{'name': "Begindate",
                'time': "2610-12-31",
                'file': [""],
                'coord': [-1000,-1000]}]    

In [None]:
dct_locations = {}
    
for filename in os.listdir(directory):
    if filename.endswith(".txt"):
        
        #save filename without the "System" and extension
        file = filename[7:-4]
        
        #read in all files
        try:
            df = pd.read_csv(directory+os.sep+filename, encoding = "utf-8", sep = "\t", header = None)
        except:
            continue
         

        for index, row in df.iterrows():
            
            if row[2] == "B-LOC":
                location = row[1]
                i = index + 1
                while checkfornext(df, i, 2) == 1:
                    location += " " + df.iat[i,1]
                    i += 1
                 
                #counter, because translating locations to modern names is done manually, and we'll only do most common names
                #if location in dct_locations.keys():
                #    dct_locations[location] += 1
                #else:
                #    dct_locations[location] = 1
            
                addInfoToPoints(location, file)
            

### Adding the points as features to the map


In [None]:
#since there is a difference between the page number in the link and the real one, I had to find those manually to correct for it
pagedifference = [23, 13, 13, 15, 15, 15, 11, 11, 13, 11, 11, 11, 11]

def findLink(file):
        try:
            volume = int(dct_volume_page.get(file)[0])
            temp_page = int(dct_volume_page.get(file)[1])

            page = temp_page + pagedifference[volume - 1]

            link = f"http://resources.huygens.knaw.nl/retroboeken/generalemissiven/#page={page}&accessor=toc&source={volume}&view=imagePane&size=1219"
            return(link)
        except:
            pass

In [None]:
#this code looks for entries in certain years that have the same locations show up, so they can be added together

new_points = []

pointer = 2



for point in points[1:]:
    found = False
    date_modified = point.get("time")[0:4]
    times = []
    files = []
    names = []
    for new_point in points[pointer:]:
        if date_modified == new_point.get("time")[0:4]:
            if point.get("coord") == new_point.get("coord") and point.get("file") != new_point.get("file"):
                found = True
                times.append(new_point.get("time"))
                files.append(new_point.get("file"))
                names.append(new_point.get("name"))
                points.remove(new_point)
                
    if found == True:
        times.append(point.get("time"))
        files.append(point.get("file"))
        names.append(point.get("name"))
        new_points.append({
                'name': names,
                'time': times,
                'file': files,
                'coord': point.get("coord")
                })
        points.remove(point)

    
    pointer += 1
    
 

In [None]:
features =[]


#this adds the  "singletons" to the features
for point in points:
    name = list(point.values())[0]
    coord = list(point.values())[3]
    if coord == [4.1905396615047525, 48.3201921]:
        continue
    file = list(point.values())[2]
    time = list(point.values())[1]
    
    link = findLink(file)
    
    features.append(
    {
        'type': 'Feature',
        'geometry': {
            'type': 'MultiPoint',
            'coordinates': [coord,coord],
        },
        'properties': {
            'popup': f'<a href={link} target="_blank"> {name} {time} </a>',
            'times': [time, time],
            'icon': 'circle',
            'iconstyle': {
                'fillColor': 'green',
                'fillOpacity': 0.6,
                'stroke': 'false',
                'radius': 13
            },
            'style': {'weight': 0},
            'id': 'man'
        }
    })
    
#this adds the waypoints with more than 1 entry to the features    
for point in new_points:
    link = []
    
    for file in point.get("file"):
        link.append(findLink(file))
    names = point.get("name")
    times = point.get("time")
    
    i = 0
    popup = ""
    while i < len(link):
        popup += f'<a href={link[i]} target="_blank"> {names[i]} {times[i]} </a>\n'
        i+=1
    
    features.append(
    {
        'type': 'Feature',
        'geometry': {
            'type': 'MultiPoint',
            'coordinates': [point.get("coord"),point.get("coord")],
        },
        'properties': {
            'popup': popup,
            'times': [point.get("time")[0], point.get("time")[0]],
            'icon': 'circle',
            'iconstyle': {
                'fillColor': 'green',
                'fillOpacity': 0.6,
                'stroke': 'false',
                'radius': 13
            },
            'style': {'weight': 0},
            'id': 'man'
        }
    })

#this makes the map    
maps = folium.Map(location=[52.3727598, 4.8936041], zoom_start = 2, tiles = "cartodbpositron")

plugins.TimestampedGeoJson(
    {
        'type': 'FeatureCollection',
        'features': features
    },
    period='P1Y',
    auto_play=False,
    loop=False,
    max_speed=1,
    loop_button=True,
    date_options='YYYY/MM/DD',
    time_slider_drag_update=True,
    duration='P1Y'
).add_to(maps)

In [None]:
#show the map
maps

In [None]:
maps.save("map.html")