# Geomapping Biblical locations

In [1]:
import os
import csv, re, collections

In [2]:
from tf.fabric import Fabric
from tf.extra.bhsa import Bhsa
BHSA = f'etcbc/bhsa/tf/c'
TF = Fabric(modules = BHSA)

This is Text-Fabric 5.5.23
Api reference : https://dans-labs.github.io/text-fabric/Api/General/
Tutorial      : https://github.com/Dans-labs/text-fabric/blob/master/docs/tutorial.ipynb
Example data  : https://github.com/Dans-labs/text-fabric-data

114 features found and 0 ignored


In [3]:
api = TF.load('''
    lex gloss nametype sp
''')
api.makeAvailableIn(globals())

  0.00s loading features ...
   |     0.15s B lex                  from C:/Users/Ejer/text-fabric-data/etcbc/bhsa/tf/c
   |     0.01s B gloss                from C:/Users/Ejer/text-fabric-data/etcbc/bhsa/tf/c
   |     0.03s B nametype             from C:/Users/Ejer/text-fabric-data/etcbc/bhsa/tf/c
   |     0.15s B sp                   from C:/Users/Ejer/text-fabric-data/etcbc/bhsa/tf/c
  5.43s All features loaded/computed - for details use loadLog()


## Production

#### Lexemes marked for topos in the ETCBC database:

In [4]:
topo_dict = {}

for n in F.otype.s('lex'):
    if F.nametype.v(n) == 'topo':
        topo_dict[n] = F.gloss.v(n)

len(topo_dict)

841

In [6]:
topo_dict

{1437699: 'Eden',
 1437713: 'Pishon',
 1437716: 'Havilah',
 1437722: 'Gihon',
 1437723: 'Cush',
 1437724: 'Tigris',
 1437727: 'Asshur',
 1437728: 'Euphrates',
 1437866: 'Nod',
 1437869: 'Enoch',
 1437994: 'Ararat',
 1438052: 'Magog',
 1438060: 'Togarmah',
 1438061: 'Elishah',
 1438062: 'Tarshish',
 1438068: 'Egypt',
 1438075: 'Sheba',
 1438076: 'Dedan',
 1438080: 'Babel',
 1438081: 'Erech',
 1438082: 'Akkad',
 1438083: 'Calneh',
 1438084: 'Shinar',
 1438085: 'Nineveh',
 1438086: 'Rehoboth Ir',
 1438087: 'Calah',
 1438088: 'Resen',
 1438097: 'Sidon',
 1438111: 'Gerar',
 1438112: 'Gaza',
 1438113: 'Sodom',
 1438114: 'Gomorrah',
 1438115: 'Admah',
 1438116: 'Zeboiim',
 1438117: 'Lasha',
 1438122: 'Aram',
 1438124: 'Hul',
 1438132: 'Sheleph',
 1438133: 'Hazarmaveth',
 1438140: '<land of gold>',
 1438143: 'Mesha',
 1438144: 'Sephar',
 1438172: 'Ur',
 1438180: '<town>',
 1438185: 'Shechem',
 1438187: 'Moreh',
 1438189: 'Bethel',
 1438191: 'Ai',
 1438227: 'Jordan',
 1438229: 'Zoar',
 1438234:

In [5]:
collections.Counter(topo_dict.values()).most_common()

[('<town>', 5),
 ('Damascus', 3),
 ('<locality>', 3),
 ('<mountain>', 3),
 ('Eden', 2),
 ('Egypt', 2),
 ('Sheba', 2),
 ('Shiloh', 2),
 ('Pi Hahiroth', 2),
 ('Sin', 2),
 ('Aroer', 2),
 ('Baal Meon', 2),
 ('Hor Haggidgad', 2),
 ('Hazar Enan', 2),
 ('Riblah', 2),
 ('Jerusalem', 2),
 ('Debir', 2),
 ('Dor', 2),
 ('Aphek', 2),
 ('Ephron', 2),
 ('Janoah', 2),
 ('Beth Marcaboth', 2),
 ('Abdon', 2),
 ('Baalath', 2),
 ('Micmash', 2),
 ('Samaria', 2),
 ('Kir Hareseth', 2),
 ('Memphis', 2),
 ('Thebes', 2),
 ('Addon', 2),
 ('Pishon', 1),
 ('Havilah', 1),
 ('Gihon', 1),
 ('Cush', 1),
 ('Tigris', 1),
 ('Asshur', 1),
 ('Euphrates', 1),
 ('Nod', 1),
 ('Enoch', 1),
 ('Ararat', 1),
 ('Magog', 1),
 ('Togarmah', 1),
 ('Elishah', 1),
 ('Tarshish', 1),
 ('Dedan', 1),
 ('Babel', 1),
 ('Erech', 1),
 ('Akkad', 1),
 ('Calneh', 1),
 ('Shinar', 1),
 ('Nineveh', 1),
 ('Rehoboth Ir', 1),
 ('Calah', 1),
 ('Resen', 1),
 ('Sidon', 1),
 ('Gerar', 1),
 ('Gaza', 1),
 ('Sodom', 1),
 ('Gomorrah', 1),
 ('Admah', 1),
 ('Zeboi

#### Importing and mapping the OpenBible data set:

Characteristics of the [OpenBible geocoding](https://www.openbible.info/geo/) data set:

Gps-coordinates:
* ? means questionable
* ~ means approximate
* '>' means the location surronds the given point
* '<' means the location is inside the city given

In this notebook gps-coordinates marked with either ? or ~ are always treated as questionable and thus given the comment '?'. All characters except for numbers are then stripped for the coordinates in order to visualize the data.

In [7]:
file = 'openbible_data.txt'

openbible_dict = {}

with open(file) as f:
    next(f)
    next(f)
    reader = csv.reader(f, delimiter='\t')
    for r in reader:
        name = r[0]
        root = r[1]
        gps = (r[2], r[3])
        openbible_dict[name] = [root, gps]
        
len(openbible_dict)

1274

In [8]:
#openbible_dict

Finding out how many cooccurences we have between the ETCBC database and the OpenBible data set in terms of shared glosses:

In [9]:
n = 0

for topo in topo_dict:
    if topo_dict[topo] in openbible_dict:
        n += 1

print(f'Shared glosses: {n}')

Shared glosses: 524


In [10]:
def cleanGPS(c):
    uncertain = ''
    comment = ''
    if c in ['?', '-']:
        uncertain = '?'
        c = ''
    if '?' in c:
        uncertain = '?'
        c = c.rstrip('?')
    if '~' in c:
        uncertain = '?'
        c = re.sub('~', '', c)
    if '<' in c:
        comment += '<'
        c = re.sub('<', '', c)
    if '>' in c:
        comment += '>'
        c = re.sub('>', '', c)
    return [c, uncertain, comment]

In [75]:
geo_coding = []

for topo in topo_dict:
    dic = {}
    if topo_dict[topo] in openbible_dict:
        name = topo_dict[topo]
        modern_name = ''
        gps = openbible_dict[topo_dict[topo]][1]

        gps_lat = cleanGPS(gps[0])
        gps_lon = cleanGPS(gps[1])
        
        #Uncertain:
        if gps_lat[1] != '':
            uncertain = gps_lat[1]
        elif gps_lon[1] != '':
            uncertain = gps_lon[1]
        else:
            uncertain = ''
            
        #Comment:
        if gps_lat[2] != '':
            comment = gps_lat[2]
        else:
            comment = ''
        
        gps = (gps_lat[0],gps_lon[0])
        if gps != ('',''):
            gps = float(gps[0]), float(gps[1])
            dic['name'] = name
            dic['modern name'] = modern_name
            dic['gps'] = gps
            dic['uncertain'] = uncertain
            dic['comment'] = comment
            geo_coding.append(dic)
            
len(geo_coding)

516

For now, I only want to use those locations that are supposedly not ambigous:

In [12]:
count = 0

for l in geo_coding:
    if l['uncertain'] != '?':
        count += 1

print(f'Certain loctions: {count}')

Certain loctions: 354


In [13]:
name_list = []

for l in geo_coding:
    name_list.append(l['name'])
    
collections.Counter(name_list).most_common()

[('Damascus', 3),
 ('Egypt', 2),
 ('Sheba', 2),
 ('Shiloh', 2),
 ('Sin', 2),
 ('Jerusalem', 2),
 ('Dor', 2),
 ('Abdon', 2),
 ('Samaria', 2),
 ('Memphis', 2),
 ('Thebes', 2),
 ('Cush', 1),
 ('Tigris', 1),
 ('Asshur', 1),
 ('Euphrates', 1),
 ('Ararat', 1),
 ('Magog', 1),
 ('Elishah', 1),
 ('Tarshish', 1),
 ('Dedan', 1),
 ('Babel', 1),
 ('Erech', 1),
 ('Calneh', 1),
 ('Shinar', 1),
 ('Nineveh', 1),
 ('Calah', 1),
 ('Resen', 1),
 ('Sidon', 1),
 ('Gerar', 1),
 ('Gaza', 1),
 ('Sodom', 1),
 ('Gomorrah', 1),
 ('Admah', 1),
 ('Zeboiim', 1),
 ('Lasha', 1),
 ('Aram', 1),
 ('Mesha', 1),
 ('Sephar', 1),
 ('Ur', 1),
 ('Shechem', 1),
 ('Jordan', 1),
 ('Zoar', 1),
 ('Mamre', 1),
 ('Hebron', 1),
 ('Ellasar', 1),
 ('Bela', 1),
 ('Seir', 1),
 ('Hobah', 1),
 ('Salem', 1),
 ('Shur', 1),
 ('Bered', 1),
 ('Paran', 1),
 ('Moriah', 1),
 ('Machpelah', 1),
 ('Mesopotamia', 1),
 ('Tema', 1),
 ('Edom', 1),
 ('Esek', 1),
 ('Sitnah', 1),
 ('Shibah', 1),
 ('Gilead', 1),
 ('Galeed', 1),
 ('Mahanaim', 1),
 ('Jabbok', 1

Creating dictionary for export as TF-feature:

In [14]:
c = 0
lat_dict = {}
long_dict = {}

for n in topo_dict:
    gloss = topo_dict[n]
    node = n
    for l in geo_coding:
        if gloss == l['name'] and l['uncertain'] != '?':
            gps = l['gps']
            lat_dict[node] = str(gps[0])
            long_dict[node] = str(gps[1])
            c += 1

print(c)
print(len(lat_dict))

380
354


#### Exporting as TF-feature

In [15]:
if 'SCRIPT' not in locals():
    SCRIPT = False
    FORCE = True
    CORE_NAME = 'bhsa'
    NAME = 'gps'
    VERSION= 'c'
    CORE_MODULE = 'core'

In [16]:
repoBase = os.path.expanduser('~/text-fabric-data/etcbc')
coreRepo = '{}/{}'.format(repoBase, CORE_NAME)
thisRepo = '{}/{}'.format(repoBase, NAME)

coreTf = '{}/tf/{}'.format(coreRepo, VERSION)

thisSource = '{}/source/{}'.format(thisRepo, VERSION)
thisTemp = '{}/_temp/{}'.format(thisRepo, VERSION)
thisTempTf = '{}/tf'.format(thisTemp)

thisTf = '{}/tf/{}'.format(thisRepo, VERSION)
thisNotes = '{}/shebanq/{}'.format(thisRepo, VERSION)

Exporting latitude feature:

In [40]:
nodeFeatures_lat = dict(lat=lat_dict)
metaData_lat = dict(
    lat=dict(
        valueType='str',
        description='Latitude of topological lexemes. The geocoding is retrieved from the data set available at https://www.openbible.info/geo/data/merged.txt',
        coreData='BHSA',
        coreVersion=VERSION
    )
)
TF = Fabric(locations=thisTempTf, silent=True)
TF.save(nodeFeatures=nodeFeatures, edgeFeatures={}, metaData=metaData)

{'gps': [{'name': 'Amam',
   'modern name': '',
   'gps': (31.162327, 35.057114),
   'uncertain': '?',
   'comment': ''},
  {'name': 'Karka',
   'modern name': '',
   'gps': (30.958506, 34.3805),
   'uncertain': '?',
   'comment': ''},
  {'name': 'Hara',
   'modern name': '',
   'gps': (36.344972, 40.789333),
   'uncertain': '?',
   'comment': ''},
  {'name': 'Cuthah',
   'modern name': '',
   'gps': (32.733333, 44.666667),
   'uncertain': '',
   'comment': ''},
  {'name': 'Memphis',
   'modern name': '',
   'gps': (29.849632, 31.253958),
   'uncertain': '',
   'comment': ''},
  {'name': 'Helkath',
   'modern name': '',
   'gps': (32.955448, 35.211971),
   'uncertain': '',
   'comment': ''},
  {'name': 'Arumah',
   'modern name': '',
   'gps': (32.154887, 35.318192),
   'uncertain': '',
   'comment': ''},
  {'name': 'Almon',
   'modern name': '',
   'gps': (31.828406, 35.287637),
   'uncertain': '',
   'comment': ''},
  {'name': 'Shechem',
   'modern name': '',
   'gps': (32.2136912312

Exporting longitude feature:

In [None]:
nodeFeatures = dict(long=long_dict)
metaData = dict(
    long=dict(
        valueType='str',
        description='Longitude of topological lexemes. The geocoding is retrieved from the data set available at https://www.openbible.info/geo/data/merged.txt',
        coreData='BHSA',
        coreVersion=VERSION
    )
)
TF = Fabric(locations=thisTempTf, silent=True)
TF.save(nodeFeatures=nodeFeatures, edgeFeatures={}, metaData=metaData)

#### Retrieving the word nodes for all lexemes annotated for topes in the ETCBC database:

In [121]:
def getRefFromGloss():
    ref_dict = collections.defaultdict(str)

    for n in F.otype.s('word'):
        lex = L.u(n, 'lex')[0]
        if F.nametype.v(lex) == 'topo':
            gloss = F.gloss.v(lex)
            ref_dict[gloss] += f' {str(n)}'
    return ref_dict

In [122]:
ref_dict = getRefFromGloss()

In [200]:
#ref_dict

## Visualizations

In [190]:
def marker_locations(lst):
    return [topo['gps'] for topo in lst]

def marker_info_box(lst):
    global ref_dict
    try:
        ref_dict
    except:
        ref_dict = getRefFromGloss()

    link_dict = {}
    for gloss in ref_dict:
        ref_list = ref_dict[gloss].split()
        ShbLink = 'http://shebanq.ancient-data.org/hebrew/text?'
        link_string = ''
        for r in ref_list:
            bo, ch, ve = T.sectionFromNode(int(r))
            link = f'<a href="{ShbLink}book={bo}&chapter={ch}&verse={ve}">{bo[:3]} {ch}:{ve}</a> '
            link_string += link
        link_dict[gloss] = link_string
    
    place_ref_list = []
    for pl in lst:
        dic = {}
        dic['name'] = pl['name']
        dic['link'] = link_dict[pl['name']]
        place_ref_list.append(dic)

    info_box_template = '''
    <dl>
    <dt>{name}</dt>
    <dd>{link}</dd>
    </dl>
    '''
    info_box = [info_box_template.format(name=gl['name'], link=gl['link']) for gl in place_ref_list]
    return info_box

# marker_info_box(geo_coding)

In [201]:
def geo_coding_places(lst):
    '''
    input: list of dictionaries
    output: list of names in list of dictionaries
    '''
    name_lst = []
    
    for l in lst:
        name_lst.append(l['name'])
        
    return name_lst

def createMap(place):
    '''
    The function creates a sublist of the list of dictionaries. Each element of the sublist
    corresponds to the place names in question.
    '''
    
    sub_lst = []
    
    for pl in place:
        if pl in geo_coding_places(geo_coding):
            for l in geo_coding:
                if l['name'] == pl:
                    sub_lst.append(l)
        else:
            print(f'{pl} does not exist in the dictionary')
            
    symbols = gmaps.symbol_layer(marker_locations(sub_lst), info_box_content=marker_info_box(sub_lst),
                                 fill_color='red', stroke_color='red', scale=3)
    
    m = gmaps.figure(map_type='TERRAIN')
    m.add_layer(symbols)
    return m

def all():
    '''
    Feeds a list of all glosses with gps-coordinates into the function createMap()
    '''
    
    lst = geo_coding_places(geo_coding)
    return geocodingPlace(lst)

In [202]:
place = ['Sinai','Jerusalem','Tarshish']

createMap(place)

Figure(layout=FigureLayout(height='420px'))

In [199]:
all()

Figure(layout=FigureLayout(height='420px'))