In [1]:
import json
import re
import pandas as pd
import roman

In [2]:
#Import entity recognition
entity_file = "../NER/NER_results.txt"
place_entity_dict = dict()
with open(entity_file, 'r', encoding='utf-8') as ef:
    for line in ef.readlines():
        line = line.strip().lstrip('%').strip()
        split_line = line.split('%')
        place = split_line[0]
        entities = split_line[1].split('#')
        
        if place not in place_entity_dict.keys():
            place_entity_dict[place] = entities

print(place_entity_dict)

{'Abate (Corte dell’)  a S. Gregorio.': ['Descrizione della Contrada di S. Gregorio', 'X Savii', 'Decime', '1661', 'Corte', 'dell’Abate', 'case', 'Abazia di S. Gregorio', '1703', 'cardinale Ottobuoni', 'Pietro Ottobuono', 'Venezia', '1667', '1689', 'Papa Alessandro VIII', 'cardinale', 'ecclesiastico', 'Avignone', 'Santa Chiesa', '1693', 'abazia di S. Gregorio', '1701', 'Antonio', 'Senato', '1710', 'Francia', 'Corte Romana', 'Libro d’Oro', '1740', 'Ottobuoni', 'Emmanuele Cicogna', '«Inscrizioni Veneziane»'], 'Abazia (Calle dell’)  a S. Gregorio.': ['Abate'], 'Abazia (Fondamenta, Sottoportico, Campo, Ponte, Fondamenta dell’)  a S. Maria della Misericordia.': ['chiesa abaziale', 'S. Maria della Misericordia', 'l’Abazia', '939', 'Val Verde', 'Cesare dei Giulii', 'Andreardo', 'Giulia', 'Moro', 'Agostiniani', '1348', 'Luca Moro', '1369', 'secolo XVII', 'Clemente Moli', 'Gasparo Moro', 'Pietro Pianton', '1868', '1884', 'patriarca di Venezia', 'Domenico Agostini', 'Girolamo Savina', 'Clemente 

In [27]:
def check_if_century(entity, place):
    try:
        century = int(roman.fromRoman(entity))
        if century < 6:
            return None
        if century < 20:
            century = century * 100 + 50
            print(place)
            print(century)
            return century
        if century < 450:
            return None
        print(place)
        print(century)
        return century

    except:
        return None


In [25]:
def check_if_year(entity, place):
    # 1st we check if the entity is a straight year
    try:
        year = int(entity)
        if year < 1864 and year > 400:
            return year
    except:
        pass

    # Then we check if the entity is a date (with a year)
    pattern = r'([0-9]{3,4})'
    years_found = re.findall(pattern, entity)
    if years_found != []:
        return years_found
    
    #see if there is a century mentionned and transform it into an int
    century = check_if_century(entity, place)
    if century:
        return century

    return None

In [28]:
# Retain only years for each place
place_year_dict = dict()
for place, entities in place_entity_dict.items():
    year_list = []
    for entity in entities:
        year = check_if_year(entity, place)
        if year and type(year) is int:
            year_list.append(year)
        elif year and type(year) is list:
            year_list + year

    place_year_dict[place] = sorted(year_list)

with open("../out/place_entity_dict.json", "w", encoding='utf-8') as fp:
    json.dump(place_year_dict , fp, ensure_ascii=False) 

Albrizzi (Ramo e Campiello, Calle, Campiello)  a S. Apollinare.
1650
Amai (Calle dei)  a S. Giovanni Gr isostomo.
1450
Amai (Calle dei)  a S. Giovanni Gr isostomo.
1650
Angelo Raffaele (Parrocchia, Campo, Rio, Pon te dell’).
1862
Annunziata (Sottoportico, Corte della)  a S. Maria Formosa.
1681
Armeni (Calle, Ramo d egli) a S. Giuliano.
1550
Aséo (Calle e Ponte dell’)  ai SS. Ermagora e Fortunato.
1550
Aséo (Calle e Ponte dell’)  ai SS. Ermagora e Fortunato.
1650
Avogarìa (Ponte, Rio, Ramo, Calle della)  a S. Barnaba.
1750
Badoer (Ramo, Sottoportico, Corte)  ai Frari.
1250
Balastro (Calle, Ramo, Sottoportico, Campiello)  a S. Basilio.
1350
Barbarigo (Corte)  all’Angelo Raffaele.
1592
Barbaro (Corte, Fondamenta, Ramo, Fondamenta, Calle)  a S. Vitale.
1450
Barbo (Ramo Corte, Corte, Ramo Corte seconda)  a S. Pantaleone.
1550
Baretteri (Ponte, Rio dei)  a S. Salvatore.
1350
Baretteri (Ponte, Rio dei)  a S. Salvatore.
1450
Baretteri (Ponte, Rio dei)  a S. Salvatore.
1650
Beccarie (Calle , Ca

In [20]:
# transform into df
records = []
for place, years in place_year_dict.items():
    for year in years:
        records.append({'Place': place, 'Year': year})

df = pd.DataFrame(records)
df[df["Place"] == "Zusto (Salizzada, Ramo Salizzada)  a S. Giacomo dall’Orio."]
df.to_csv("../out/place_entity_df.csv", index=False)

In [6]:
import plotly.express as px

fig = px.scatter(df[df["Place"] == "Zusto (Salizzada, Ramo Salizzada)  a S. Giacomo dall’Orio."], x="Year", y="Place", title="Historical Mentions of Places",
                 labels={"Year": "Year", "Place": "Place"}, hover_data=['Place'])
fig.show()