In [2]:
import csv
import json
import xml.etree.ElementTree as ET
import re
from textwrap import wrap
from difflib import SequenceMatcher
from unidecode import unidecode
import urllib.request
from SPARQLWrapper import SPARQLWrapper, JSON
from googletrans import Translator
from deep_translator import GoogleTranslator


# Importazione file

Importo i file csv contenenti i risultati delle query.
Ogni record realtivo a un ms. è convertito in un dizionario e aggiunto a una lista.

In [181]:
imago = [] # lista dei record dei mss. Imago

fields = ['Signature', 'Library', 'Location', 'Authors', 'Notes', 'LocationIRI']

with open('imago.csv', encoding='utf-8') as imagofile:
    imago_csv = list(csv.reader(imagofile))
    for row in imago_csv[1:]: 
        record = {}
        
        for key, value in zip(fields, row):
            record[key]=value
            
        if record['Notes']:
            #estrazione del n. di fogli e delle dimensioni dalle note
            record['Notes']=re.sub(r"(\d+), I+'", r'\1', record['Notes'])
            noteNumbers = re.findall(r'\d{1,3}(?![\drv])(?:\s*(?:ff|fogli)|(?:-\d+)?\W*x\W*\d+(?:-\d+)?\W*mm)?', record['Notes'])
            matches = sorted(set(noteNumbers), key=noteNumbers.index)
            if re.search(r'f(f|ogli)', str(matches)) and 'mm' in str(matches):
                matches = [x for x in matches if re.search(r'f(f|ogli)|mm', x)]
            record['Notes']=', '.join(matches)
            
        imago.append(record)
        
imago.sort(key=lambda x: (x['Location'], x['Library'], x['Signature'].strip()))

In [182]:
mmm = [] #lista dei record dei mss. MMM

fields = ['Signature', 'Library', 'Location', 'Authors', 'Measures', 'IRI', 'Collection', 'url', 'Source']

with open('mmm_bibale_oxford.csv', encoding='utf-8') as bibale_file, open('mmm_sdbm.csv', encoding='utf-8') as sdbm_file:
    bibale_csv = list(csv.reader(bibale_file))
    sdbm_csv = list(csv.reader(sdbm_file))
    mmm_csv = sdbm_csv[1:] + bibale_csv[1:]
    
    for row in mmm_csv:
        record={}
        
        measures = {}
        for key, value in zip (['folios', 'height', 'width'], row[4:7]):            
            measures[key] = value
        del row[4:7]
        row.insert(4, measures)
            
        for key, value in zip(fields, row):
            record[key]=value
            
        mmm.append(record)
        
mmm.sort(key=lambda x: (x['Location'], x['Library'], x['Signature']))

In [183]:
# file json creato manualmente importato come dizionario. 
# Include gli autori di Imago presenti in MMM a cui sono associati la denominazione usata in MMM e un pattern regex
with open("authorNames.json", 'r') as f:
        authorNames = json.load(f)
        


## Recupero delle labels delle città

Per le località Imago usa i nomi in italiano (laddove disponibili), mentre in MMM sono riportati in inglese o in lingua originale. In certi casi questo comporta un ostacolo nel rilevamento dei match, dato che la località è il primo criterio in base al quale la lista di MMM viene filtrata.
Dato che in Imago sono forniti gli IRI di Wikidata relativi alle località, da questi è possibile ricavare i nomi delle città in inglese e nelle lingue ufficiali del paese di appartenenza interrogando il KB di Wikidata.

In [184]:
# il dizionario ottenuto dal codice è stato salvato nella cartella come 'mappedPlaces.json'
# che può essere importato direttamente senza dover eseguire nuovamente il codice.

with open("file_generati/mappedPlaces.json", 'r') as f:
        mappedPlaces = json.load(f)

In [None]:
# effettua una query SPARQL all'endpoint di Wikidata. 
# a partire da una località ricava le denominizaioni in inglese e nelle lingue ufficiali del paese 
def getPlaceNames(place):
    sparql = SPARQLWrapper('https://query.wikidata.org/sparql')

    sparql.setQuery(f"""
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX wd: <http://www.wikidata.org/prop/direct/>

        SELECT DISTINCT ?placeLabel WHERE {{
          BIND(<{place}> AS ?place)
          ?place rdfs:label ?label.                                          
          OPTIONAL {{
            ?place wd:P17/wd:P37 ?lang.  
            ?lang wd:P282 <http://www.wikidata.org/entity/Q8229>; #solo lingue che usano alfabeto latino
                  wd:P424 ?langCode. 
          }}
          FILTER(LANG(?label)='en' || LANG(?label)=?langCode)
          BIND(STR(?label) AS ?placeLabel)

        }}                                                            

    """)

    ret = sparql.queryAndConvert()
    xml = ret.toxml()
    tree = ET.ElementTree(ET.fromstring(xml))
    root = tree.getroot()
    results = []
    for result in root.findall('.//*{http://www.w3.org/2005/sparql-results#}result'):
        results.append(result[0][0].text)
    
    return results

In [95]:
# lista di tuple ciascuna contenente l'iri e la label della località.
places = set([(record['LocationIRI'], record['Location']) for record in imago])


mappedPlaces = {} # a ogni chiave (denominazione della città usata in Imago) corrisponde una lista dei nomi restiuiti dalla query
for tupl in places:
    placeIRI = tupl[0]
    labels = getPlaceNames(placeIRI)
    labels = [re.sub(r'\W*\b[Cc]ity( of)?\b\W*', '', label).lower() for label in labels] #sono rimosse diciture come 'City' e 'City of'
    for label in labels:
        if unidecode(label)!=label: 
            labels.append(unidecode(label)) #si includono versioni dei nomi in cui sono rimossi segni diacrtiici o caratteri non standard sono normalizzati 
    imgLabel = tupl[1]
    mappedPlaces[imgLabel] = labels

#alcuni nomi aggiunti manualmente
additions = {
    'Bruxelles':'bruxelles', # aggiunta necessaria perché la denominiazione ufficiale da Wikidata è "Ville de Bruxelles"
    'Cambridge (MA)':'harvard', 
    'Cracovia':'cracow', 
    'Filadelfia':'university of pennsylvania', 
    'Friburgo':'freiburg',
    'Monaco di Baviera':'muenchen',
    'Saint-Bonaventure':'st. bonaventure'
}
for key in additions:
    mappedPlaces[key].append(additions[key])
    
mappedPlaces = dict(sorted(mappedPlaces.items()))

#with open("file_generati/mappedPlaces.json", 'w') as f:
    #json.dump(mappedPlaces, f, indent=2)

# Funzioni

### Normalizzazione delle stringhe e conversione in lista

In [185]:
# modifica una stringa e ne isola i singoli elementi in una lista
def process(string):
        
    string = string.lower()
    
    string = re.sub(r'\b[0]+', '', string) #rimuove gli zeri all'inizio di una sequenza di cifre (es. 0015 -> 15)
    string = re.sub(r'\b[Mm][Ss]', '', string) #rimuove l'abbreviazione Ms (manoscritto)
    string = re.sub(r'\b[Cc]od(?:ex)?\b', '', string) #rimuove diciture come cod e codex
 
    string = re.sub(r'olim', '', string) #rimuove la dicitura 'olim'
    
    #separa sequenze numeriche da eventuali caratteri contigui
    string = re.sub(r'\b(\d+)([A-Za-z]+)\b', r'\1 \2', string) 
    string = re.sub(r'\b([A-Za-z]+)(\d+)\b', r'\1 \2', string) 
    
    #sostituzione degli ordinali 2° (secundo folio) e 4° (quarto folio) con i caratteri 'f' e 'q'
    string = re.sub(r'\b2°', 'f', string)
    string = re.sub(r'\b4°', 'q', string)
    
    #sostituzioned di numeri abbreviati dopo trattino con la forma estesa (es. 1160-63 -> 1160-1163)
    match = re.search(r"\d+-\d+", string)
    if match:
        numbers = match.group(0).split('-')
        if len(numbers[1])<len(numbers[0]) or len(numbers[1])==len(numbers[0])>=4:
            string = re.sub(f'-{numbers[1]}', ' '+numbers[0][:len(numbers[0])-len(numbers[1])]+numbers[1], string)
    string = re.sub(r'(?<=\d)-\d+', '', string)
    
    #rimuove segni di interpunzione e caratteri speciali
    string = re.sub(r'[^\w\s]', ' ', string).strip()
        
    lista = re.split(r'\s+', string) #crea una lista dei singoli elementi della stringa
    return lista

### Gestione numeri romani

In [186]:
#controlla se una stringa può essere un numero romano
def isRoman(string):
    string = string.lower()
    pattern = re.compile(r"^m*(c?[md]|d?c{1,4})?(xc|x?l|l?x{1,4})?(ix|i?v|v?i{1,4})?$")
    if string and pattern.match(string):
        return True
    else:
        return False


#converte un numero romano in cifre arabe
def convertRoman(string):
    string = string.lower()
    if isRoman(string):
        roman = {'i':1,'v':5,'x':10,'l':50,'c':100,'d':500,'m':1000,'iv':4,'ix':9,'xl':40,'xc':90,'cd':400,'cm':900}
        i = 0
        num = 0
        while i < len(string):       
            if i+2<=len(string) and string[i:i+2] in roman:
                num+=roman[string[i:i+2]]
                i+=2
            else:
                num+=roman[string[i]]
                i+=1
        return str(num)
    else:
        return 0

### Confronto biblioteche

In [17]:
# per ogni biblioteca di Imago, quando una biblioteca MMM è matchata o scartata viene memorizzata rispettivamente nella lista 'Matches' e 'Non-matches',
# così da non dover ripetere l'operazione di confronto pià volte per quella stessa biblioteca
mappedLibraries = {}
for lib in set([record['Library'] for record in imago]):
    mappedLibraries[lib] = {'Matches':[], 'Non-matches':[]}

In [187]:
# il dizionario è stato salvato come file json dopo la prima esecuzione del codice, e può essere direttamente importato
with open('file_generati/mappedLibraries.json', 'r', encoding='utf-8') as f:
    mappedLibraries = json.load(f)

In [188]:
translator = Translator() # traduttore della libreria googletrans che usa l'API di Google Translate

# ritorna un valore di somiglianza tra due stringhe compreso tra 0 e 1
def strSimilarity(imgElem, mmmElem):
    return SequenceMatcher(None, imgElem, mmmElem).ratio()

#confronto delle biblioteche
def libraryMatch(imgRecord, mmmRecord, translate=True):
    
    imgLibrary = imgRecord['Library']
    
    # I mss. Bibale riportano la biblioteca nella label insieme alla segnatura (separati da una virgola).
    if mmmRecord['Source']=='Bibale Database':
        mmmAllLibraries = re.split(r', (?!.*,)', mmmRecord['Signature'])[0]
    else:    
        mmmAllLibraries = mmmRecord['Library'] if mmmRecord['Library'] else mmmRecord['Collection']
    mmmLibraries_list = mmmAllLibraries.split(' *** ')
    
    if (
        any(mmmLibrary in mappedLibraries[imgLibrary]['Matches'] for mmmLibrary in mmmLibraries_list)
        or
        imgLibrary == 'Biblioteca Bodleiana' and 'University of Oxford' in mmmAllLibraries
        or 
        imgLibrary == 'Bibliothèque nationale de France' and 'BNF' in mmmAllLibraries
        or
        # le biblioteche municipali francesi sono spesso rinominate mediateche. 
        all(re.search('municipale|médiathèque', lib, re.IGNORECASE) for lib in [imgLibrary, mmmAllLibraries])                
    ):
        return True


    imgLibrary_edit = imgLibrary.lower()
    
    # dalla stringa si rimuovono il nome della località e diciture ricorrenti che possono produrre false corrispondenze
    imgLibrary_edit = imgLibrary_edit.replace(imgRecord['Location'].lower(), '')
    for placeName in mappedPlaces[imgRecord['Location']]:
        imgLibrary_edit = imgLibrary_edit.replace(placeName, '')
    imgLibrary_edit = re.sub(r'(biblioteca|library|college)', '', imgLibrary_edit)
    # si isolano i singoli elementi della stringa in una lista, escludendo parole con meno di 4 caratteri
    imgElements = [elem for elem in process(imgLibrary_edit) if len(elem)>=4] 

    for mmmLibrary in mmmLibraries_list:
        
        if mmmLibrary in mappedLibraries[imgLibrary]['Non-matches']:
            continue
        
        mmmLibrary_edit = mmmLibrary.lower()
        # espressioni come "Universitätsbibliothek" e simili sono modificate per facilitare il confronto
        mmmLibrary_edit = re.sub(r'universit[a-z]+\b', 'universit', mmmLibrary_edit) 
        # si isolano i singoli elementi della stringa in una lista
        mmmElements = [elem for elem in process(mmmLibrary_edit) if len(elem)>=4]
        
        # lista degli elementi di imago che riportano una somiglianza significativa con uno degli elementi MMM
        matches = [imgElem for imgElem in imgElements if any(strSimilarity(imgElem, mmmElem)>0.7 for mmmElem in mmmElements)]
        
        # se gli elementi matchati sono più della metà degli elementi di Imago, la funzione ritorna True
        if len(matches) > len(imgElements)/2:
            mappedLibraries[imgLibrary]['Matches'].append(mmmLibrary)
            return True
        else: 
            if translate: #effettua una traduzione automatica della stringa MMM                
                try:
                    imgLang = translator.detect(imgLibrary).lang # lingua della stringa Imago (non sempre è italiano)
                    mmmLibrary_trans = GoogleTranslator(source='auto', target=imgLang).translate(text=mmmLibrary)
                    mmmElements = [elem for elem in process(mmmLibrary_trans) if len(elem)>=4]
                    matches = [imgElem for imgElem in imgElements if any(strSimilarity(imgElem, mmmElem)>0.7 for mmmElem in mmmElements)] 
                    if len(matches) > len(imgElements)/2:
                        mappedLibraries[imgLibrary]['Matches'].append(mmmLibrary)
                        return True
                    else:
                        mappedLibraries[imgLibrary]['Non-matches'].append(mmmLibrary)
                except:
                    continue 
        
    return False

### Confronto autori

In [189]:
# confronto degli autori riportati nei mss.
def authorMatch(imgRecord, mmmRecord):
    imgAuthors = imgRecord['Authors']
    mmmAuthors = mmmRecord['Authors']
    html = ''
    
    #si cercano eventuali occorrenze dei nomi anche nel codice html delle pagine esterne a cui rimandano gli url forniti in MMM.
    try:
        urls = mmmRecord['url'].split(' *** ')
        for url in urls:
            response = urllib.request.urlopen(url)
            html += str(response.read())
    except:
        None
    for imgAuthor in imgAuthors.split(' *** '):
        if (
            imgAuthor in authorNames
            and
            (authorNames[imgAuthor][0] in mmmAuthors+html
            or 
            re.search(f'\\b{authorNames[imgAuthor][1]}', html.lower()))
        ):
            
            return True
        
    return False

### Confronto fogli e dimensioni

In [190]:
# confronto delle misure (numero fogli e dimnesioni dei mss.)
def measureMatch(imgRecord, mmmRecord):
    
    # ad ogni parametro viene assegnato un punteggio (inizializzato a 0)
    similarity = {
        'folios': 0,
        'height': 0,
        'width': 0  
    }
    
    mmmMeasures = mmmRecord['Measures'] #dizionario
        
    imgNotes = imgRecord['Notes'] #stringa non strutturata
    imgMeasures = {}
    imgMeasures['folios'] = re.findall(r'\b\d+(?=\s*ff|\s*fogli)', imgNotes)
    imgMeasures['height'] = re.findall(r'\b\d+(?=\s*x)', imgNotes)
    imgMeasures['width'] = re.findall(r'(?:x\s*)(\d+\b)', imgNotes)
    
    for measure in similarity:
        
        if mmmMeasures[measure]:
            mmmValues = mmmMeasures[measure].split(' ~ ')
        else:
            continue
        imgValue = imgMeasures[measure][0] if imgMeasures[measure] else None   
        if imgValue:
            # in base alla differenza tra i due valori viene assegnato un certo punteggio al parametro
            if any(abs(int(imgValue)-int(mmmValue))<=1 for mmmValue in mmmValues):               
                similarity[measure] = 2
            elif any(abs(int(imgValue)-int(mmmValue))<=3 for mmmValue in mmmValues):
                similarity[measure] = 1 
            else:
                similarity[measure] = -1
        else:        
            if any(len(mmmValue)>1 and re.search(f'\\b{mmmValue}\\b', imgNotes) for mmmValue in mmmValues):
                similarity[measure] = 1
            

    # a seconda dei valori assegnati ai parametri ritorna un valore compreso tra 0 e 3
    if all(similarity[measure]==2 for measure in similarity):
        return 3   
    if similarity['folios']==2 or (similarity['height']>1 and similarity['width']>1):
        return 2
    return all(similarity[measure] > 0 for measure in similarity)

### Confronto segnature

In [201]:
# effettua un primo confronto delle segnature meno restrittivo, che interessa fondamentalmente gli elementi numerici della segnatura
def similarSignature(imagoRecord, mmmRecord):
    imgSignature = imagoRecord['Signature']
    
    if 'Bibale Database' in m['Source']:
        allMmmSignatures = mmmRecord['Signature'].split(', ')[-1]
    else:
        allMmmSignatures = mmmRecord['Signature']
    
    #ciclo for sulle singole segnature concatenate a livello di query (separate dalla sequenza ' *** ')
    for mmmSignature in allMmmSignatures.split(' *** '):
        
        #lista degli elementi numerici estratti dalla segnatura di MMM
        mmmNumbers = [elem for elem in process(mmmSignature) if elem.isnumeric() or isRoman(elem)]
        
        #i mss. della collezione cottoniana seguono uno schema preciso (Liberia-scaffale-numero) e sono quindi più controllabili.
        #per evitare match erronei già dall'inizio, verifico se la libreria (riportante il nome di un personaggio dell'antichità romana) e il numero dello scaffale corrispondano
        if imgSignature.strip().startswith('Cotton'):
            bookCase = process(imgSignature)[1] 
            bookCase = re.sub(r'us\b', '', bookCase)
            shelf = process(imgSignature)[2]              
            if bookCase not in mmmSignature.lower() or (len(process(mmmSignature))>1 and process(mmmSignature)[1]!=shelf):
                continue
                
        #eventuali segnature alternative o precedentemente usate indicate tra parentesi vengono separate e confrontate a parte
        #al primo confronto andato a buon fine la funzione ritorna True       
        for subSignature_imago in imgSignature.split('('):

            #lista degli elementi numerici estratta dalla segnatura di Imago
            imgNumbers = [elem for elem in process(subSignature_imago) if elem.isnumeric()]

            if imgNumbers:

                #il confronto è gestito diversamente se la segnatura di MMM figura come un codice formato da caratteri contigui (es. BBRXR0292102922) 
                if re.search(r'\b[A-Z]{4,}\S*\d+', mmmSignature):
                    if all(re.search(f'(?<![1-9]){num}(?![1-9])', mmmSignature) or (len(num)>4 and num in mmmSignature) for num in imgNumbers):
                        return True 
                elif all(num in mmmNumbers for num in imgNumbers):
                    return True
            else:   
                #se non sono presenti elementi numerici si cercano potenziali numeri romani
                imgNumbers = [elem for elem in process(subSignature_imago) if isRoman(elem)] 

                #nella lista sono inclusi anche i rispettivi valori in cifre arabe (es. ['iv', 4, 'ix', 9])
                imgNumbers += [convertRoman(num) for num in imgNumbers]
 
                if any(num in imgNumbers for num in mmmNumbers):
                    return True
                    
    return False

In [192]:
# funzione a parte per il confronto di acronimi
def checkAcronym(imgSignature, mmmSignature):
    
    imgSignature = imgSignature.strip()
    mmmSignature = mmmSignature.strip()
    
    # cerca eventuali acronimi tra le segnature
    imgAcronyms = [elem.lower() for elem in re.findall(r'\b[A-Z]{3,}\b', imgSignature) if not isRoman(elem)]
    mmmAcronyms = [elem.lower() for elem in re.findall(r'\b[A-Z]{3,}\b', mmmSignature) if not isRoman(elem)]
    
    if imgAcronyms or mmmAcronyms:
        # stringhe dei primi caratteri di ogni elemento della segnatura
        imgFirstChars = ''.join([elem.strip()[0].lower() for elem in imgSignature.split(' ') if elem[0].isalpha()])
        mmmFirstChars = ''.join([elem.strip()[0].lower() for elem in mmmSignature.split(' ') if elem[0].isalpha()])
        return any(acronym in mmmFirstChars for acronym in imgAcronyms) or any(acronym in imgFirstChars for acronym in mmmAcronyms)
    else:
        return False



# funzione a parte per confrontare un codice catalografico formato da caratteri contigui
def checkCode(imgSignature, mmmCode):  
    
    #poiché il codice consta di caratteri contigui non è possibile isolare in modo certo i singoli elementi;
    #di conseguenza, si procede a rimuovere dal codice ogni elemento riscontrato anche nella segnatura di Imago.
    #Se alla fine non vi sono caratteri rimanenti, la funzione ritorna True.
    
    #vengono eliminati innanzitutto i primi caratteri del codice che servono a identificare paese (primo caratt.), città (tre caratt. seguenti) e biblioteca (quinto caratt.)
    #il quinto carattere indicante la biblioteca è omesso per la Biblioteca Apostolica Vaticana
    mmmCode = mmmCode[4:].lower() if mmmCode.startswith('IVAT') else mmmCode[5:].lower() 

    #lista di elementi non numerici della segnatura di Imago.
    #per le diciture Clm (codices latini monacenses) e Cgm (codices germanici monancenses) relativi ai mss. della Bayerische Staatsbibliothek,
    #si includono solo i secondi caratteri (rispettivamente 'l' e 'g'), in quanto sono quelli presenti nei codici SDBM (es. 'Clm 10291' -> 'DMUNBL10291/00')
    nonNumericElements = [x[1] if x.lower() in ('clm', 'cgm') else x for x in process(imgSignature) if x.isalpha()]
    
    #elementi numerici della segnatura di Imago
    numericElements = [elem for elem in process(imgSignature) if elem.isnumeric()]
   
    for elem in nonNumericElements:
        #ricavo una sottostringa dell'elemento ogni volta più piccola fino ad includere solo il primo carattere.
        #per ogni sottostringa si controlla se questa è presente nel codice; in caso positivo, la rimuovo dal codice e interrompo il ciclo
        for i in range(0, len(elem)):
            substring = elem[0:len(elem)-i]
            if substring in mmmCode:
                mmmCode = mmmCode.replace(substring, '', 1)
                break
    
    #controllo se tutti gli elementi numerici del codice SDBM figurino anche nella segnatura imago 
    for num in numericElements:
        
        #se il numero consta di più di quattro cifre, verifico semplicemente se è contenuto nella stringa 
        #(es. 10147-10158 -> BBRXR1014710158)
        if len(num)>4:
            mmmCode = mmmCode.replace(num, '', 1)
        #altrimenti, controllo che non sia contiguo ad altre cifre
        else:
            mmmCode = re.sub(f'(?<![1-9]){num}(?![1-9]|0+\\b)', '', mmmCode)
      
    mmmCode=re.sub(r'[0\W]', '', mmmCode)
    return not mmmCode

In [193]:
#confronto più dettagliato delle segnature
def signMatch(imgRecord, mmmRecord):
    
    imagoSignature = imgRecord['Signature'] 
    if 'Bibale Database' not in mmmRecord['Source']:
        allMmmSignatures = mmmRecord['Signature']  
    else:
        allMmmSignatures = mmmRecord['Signature'].split(', ')[-1] 
    collectionElements = process(mmmRecord['Collection'])
        
   
    for mmmSignature in allMmmSignatures.split(' *** '):

        for mmmSubsignature in mmmSignature.split('('):      
            for imgSubsignature in imagoSignature.split('('):
                
                # se la segnatura è un codice di caratteri contingui (es. SBARAR151/00) richiamo la funzione checkCode
                if re.search(r'\b[A-Z]{4,}\S*\d+', mmmSubsignature):
                    if checkCode(imgSubsignature, mmmSubsignature):
                        return True                 
                else:
                    imgElements = process(imgSubsignature)
                    mmmElements = process(mmmSubsignature)
                    acronymMatch = checkAcronym(imgSubsignature, mmmSubsignature)
                    if len(mmmElements)<=len(imgElements) or acronymMatch:    
                        numericElements_imago = [elem for elem in imgElements if elem.isnumeric() or isRoman(elem)]
                        alphaElements_imago = [elem for elem in imgElements if elem.isalpha() and not isRoman(elem)]                    
                        
                        numericMatch = all(num in mmmElements or convertRoman(num) in mmmElements for num in numericElements_imago)
                        alphaMatch = all(
                                            any(mmmElem.startswith(imgElem) or imgElem.startswith(mmmElem) for mmmElem in mmmElements) 
                                            or 
                                            len(imgElem)>=3 and any(collElem.startswith(imgElem) for collElem in collectionElements) 
                                            for imgElem in alphaElements_imago
                                        )         
                        if numericMatch and (alphaMatch or acronymMatch):
                            return True
                            
                       
    return False

### Resa user-firendly delle informazioni di due mss. matchati

In [194]:
#visualizzazione user-friendly delle informazioni di due record matchati
def display(match):

    imgRecord = match[0].copy()
    mmmRecord = match[1].copy()  
        
    del imgRecord['LocationIRI']     

    for auth in imgRecord['Authors'].split(' *** '):
        if auth in authorNames and authorNames[auth][0] in mmmRecord['Authors']:
            mmmRecord['Authors'] = mmmRecord['Authors'].replace(authorNames[auth][0], authorNames[auth][0].upper())
    if mmmRecord['Measures']:
        mmmRecord['Measures'] = (', ').join([key+': '+mmmRecord['Measures'][key] for key in mmmRecord['Measures'] if mmmRecord['Measures'][key]])
    
    display_string = '\n'
    for imgKey, mmmKey in zip(imgRecord, mmmRecord):        
        imgLines = '\n'.join(wrap(imgRecord[imgKey], 38)).split('\n')    
        mmmLines = '\n'.join(wrap(mmmRecord[mmmKey], 50)).split('\n')   
        imgField, mmmField = imgKey+':', mmmKey+':'
        
        for n, lines in enumerate(zip(imgLines, mmmLines)):
            display_string+=f"  {imgField:12}{lines[0]:42}{mmmField:12}{lines[1]}\n"
            if n == 0:
                imgField = mmmField = '' 
        if len(imgLines)!=len(mmmLines):
            maxList = max(imgLines, mmmLines, key=len)
            minList = min(imgLines, mmmLines, key=len)
            string=' '*14 if maxList == imgLines else ' '*68
            for line in maxList[len(minList):]:
                display_string += string+line+'\n'  
        if mmmKey == 'Library':
            lines = '\n'.join(wrap(mmmRecord['Collection'], 50)).split('\n')
            field = 'Coll.:'
            for n, line in enumerate(lines):
                display_string += f"{' '*56}{field:12}{line}\n"
                if n == 0:
                    field =''
        
    display_string += f"{' '*56}{'IRI:':12}{mmmRecord['IRI']}\n\n{'-'*130}"
     
    return display_string




# CONFRONTO E MAPPATURA DEI MANOSCRITTI

In [None]:
results = [] #lista dei match individuati. Ogni match consiste di una tupla contenente il dizionario-record di Imago e quello di MMM 

#aggiunge la tupla dei record matchati alla lista 'matches' e scrive le informazioni su file
def confirm_match(i, m, certain_match = False):
    match = (i, m)
    results.append(match)
    n = results.index(match)
    if certain_match:
        file_match_certi.writelines(['  '+str(n)+'.\n', display(match)+'\n'])
    else:        
        file2.writelines(['  '+str(n)+'.\n', display(match)+'\n'])
    print('  '+str(n)+'.')
    print(display(match))

In [98]:
file_match_certi = open('file_generati/match_certi.txt', 'w', encoding='utf-8')
file_match_incerti = open('file_generati/match_incerti.txt', 'w', encoding='utf-8')



for place in mappedPlaces:
    # per ogni città, filtro le liste di Imago e MMM in modo da includere solo i record che riportano un riferimento alla città
    imago_filt = [record for record in imago if record['Location']==place]
    placeNames = mappedPlaces[place]
    mmm_filt = []
    for m in mmm:
        # esclude mss. MMM conservati a Cambrdige (Massachussets) per i mss. Imago dell'Università di Cambridge
        if place=='Cambridge' and any('harvard' in m[key].lower() for key in ['Location', 'Library', 'Signature']):
            continue
        for name in placeNames:
            locationMatch =  any(name in m[key].lower() for key in ['Location', 'Library', 'Signature'])
            codeLocationMatch = re.match(r'\b[A-Z]{4,}\S*\d+', m['Signature']) and name.startswith(m['Signature'][1:4].lower())
            if locationMatch or (not m['Location'] and codeLocationMatch):
                mmm_filt.append(m)
                break
    
    for i in imago_filt:
        for m in mmm_filt:
            
            if similarSignature(i, m):
                
                if measureMatch(i, m)==3:
                    confirm_match(i, m, True)

                elif i['Library']=='British Library':
                    if signMatch(i, m) and libraryMatch(i, m):
                        confirm_match(i, m)
                
                elif i['Location'] in ['Parigi', 'Londra', 'Oxford', 'Cambridge', 'Città del Vaticano', 'Chicago']:
                    if (measureMatch(i,m)==2 and authorMatch(i,m)) or ((signMatch(i, m) or authorMatch(i,m)) and libraryMatch(i, m)):
                        confirm_match(i, m)

                else: 
                    if authorMatch(i, m) or libraryMatch(i, m)+(measureMatch(i, m)>0)+signMatch(i, m)>1:
                        confirm_match(i, m)

        
file_match_certi.close() 
file_match_incerti.close()



#with open("file_generati/results.json", 'w') as f:
    #json.dump(matches, f, indent=2)
    
#with open("file_generati/mappedLibraries.json", 'w') as f:
    #json.dump(mappedLibraries, f, indent=2)

  0.

  Signature:  3024C                                     Signature:  3024C
  Library:    Aberystwyth, National Library of Wales    Library:    
                                                        Coll.:      Handlist of Manuscripts in the National Library of
                                                                    Wales, vol. I, pt. V
  Location:   Aberystwyth                               Location:   Aberystwyth
  Authors:    Giraldus Cambrensis                       Authors:    GIRALDUS, CAMBRENSIS
  Notes:      98 ff, 225x115 mm                         Measures:   
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/sdbm_orphan_149030

----------------------------------------------------------------------------------------------------------------------------------
  1.

  Signature:  3024C                                     Signature:  3024C
  Library:    Aberystwyth, National Library of Wales    Library:

  9.

  Signature:  Ripoll 151                                Signature:  SBARAR151/00
  Library:    Archivio Generale della Corona di         Library:    Archivo de la Corona di Aragon
              Aragona
                                                        Coll.:      
  Location:   Barcellona                                Location:   
  Authors:    Beda Venerabilis                          Authors:    Isidore, of Seville, Saint, -636
  Notes:      168 ff, 215 x 135 mm                      Measures:   
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/sdbm_orphan_102818

----------------------------------------------------------------------------------------------------------------------------------
  10.

  Signature:  39                                        Signature:  39
  Library:    Belluno, Biblioteca Lolliana              Library:    Biblioteca Lolliniana
              Gregoriana
                              

  18.

  Signature:  10147-10158                               Signature:  BBRXR1014710158
  Library:      Bruxelles, KBR (olim Bibliothèque       Library:    Bruxelles, Bibliothèque royale de Belgique
              Royale «Albert Ier»)
                                                        Coll.:      
  Location:   Bruxelles                                 Location:   
  Authors:    Opus sine auctore                         Authors:    Cicero, Marcus Tullius
  Notes:      96 ff                                     Measures:   folios: 96, height: 153, width: 105
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/sdbm_orphan_110445

----------------------------------------------------------------------------------------------------------------------------------
  19.

  Signature:  1160-1163                                 Signature:  BBRXR0116001163
  Library:      Bruxelles, KBR (olim Bibliothèque       Library:    Bruxelles,

  28.

  Signature:  5541-5542                                 Signature:  BBRXR0554105542
  Library:      Bruxelles, KBR (olim Bibliothèque       Library:    Bruxelles, Bibliothèque royale de Belgique ***
              Royale «Albert Ier»)                                  Gembloux, abbaye O.S.B. (H)
                                                        Coll.:      
  Location:   Bruxelles                                 Location:   
  Authors:    Johannes Presbyter                        Authors:    
  Notes:      1, 46                                     Measures:   folios: 121
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/sdbm_orphan_108682

----------------------------------------------------------------------------------------------------------------------------------
  29.

  Signature:  733-41 (1709)                             Signature:  BBRXR0073300741
  Library:      Bruxelles, KBR (olim Bibliothèque       Lib

  37.

  Signature:  Dd.1.17                                   Signature:  Dd. 1. 17.
  Library:    Cambridge University Library              Library:    Cambridge, University
                                                        Coll.:      A Catalogue of the Manuscripts preserved in the
                                                                    University Library, Cambridge
  Location:   Cambridge                                 Location:   Cambridge
  Authors:    Franciscus Pipinus                        Authors:    
  Notes:      424 ff, 450x315 mm                        Measures:   folios: 424
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/sdbm_orphan_211221

----------------------------------------------------------------------------------------------------------------------------------
  38.

  Signature:  Dd.8.7                                    Signature:  Dd. 7. 8.
  Library:    Cambridge University Li

  43.

  Signature:   315                                      Signature:  315 *** CCCXV *** N IX
  Library:    Corpus Christi College                    Library:    CAMBRIDGE, Parker Library (at Corpus Christi
                                                                    College)
                                                        Coll.:      Catalogus librorum manuscriptorum in Bibliotheca
                                                                    Collegii Corporis Christi quos legavit Matthaeus
                                                                    Parkerus Archiepiscopus Cantuariensis *** A
                                                                    descriptive catalogue of the manuscripts in the
                                                                    library of Corpus Christi College Cambridge ***
                                                                    Catalogus librorum manuscriptorum quos Collegio
                  

  47.

  Signature:  MS 275                                    Signature:  275 *** A IV *** CCLXXV
  Library:    Corpus Christi College                    Library:    CAMBRIDGE, Parker Library (at Corpus Christi
                                                                    College)
                                                        Coll.:      Catalogus librorum manuscriptorum in Bibliotheca
                                                                    Collegii Corporis Christi quos legavit Matthaeus
                                                                    Parkerus Archiepiscopus Cantuariensis *** A
                                                                    descriptive catalogue of the manuscripts in the
                                                                    library of Corpus Christi College Cambridge ***
                                                                    Catalogus librorum manuscriptorum quos Collegio
                 

  51.

  Signature:  Ms. 370 (9.6)                             Signature:  370 *** 9.6 *** CCCLXX *** GCAMCCO370/00
  Library:    Corpus Christi College                    Library:    CAMBRIDGE, Parker Library (at Corpus Christi
                                                                    College) *** Norwich, Benedictine cathedral priory
                                                        Coll.:      Catalogus librorum manuscriptorum in Bibliotheca
                                                                    Collegii Corporis Christi quos legavit Matthaeus
                                                                    Parkerus Archiepiscopus Cantuariensis *** A
                                                                    descriptive catalogue of the manuscripts in the
                                                                    library of Corpus Christi College Cambridge ***
                                                                    Catalo

  55.

  Signature:  Ms. 426 (N. 30)                           Signature:  426 *** CCCCXXVI *** N XXX
  Library:    Corpus Christi College                    Library:    CAMBRIDGE, Parker Library (at Corpus Christi
                                                                    College)
                                                        Coll.:      Catalogus librorum manuscriptorum in Bibliotheca
                                                                    Collegii Corporis Christi quos legavit Matthaeus
                                                                    Parkerus Archiepiscopus Cantuariensis *** A
                                                                    descriptive catalogue of the manuscripts in the
                                                                    library of Corpus Christi College Cambridge ***
                                                                    Catalogus librorum manuscriptorum quos Collegio
              

  59.

  Signature:  ms. 66 A                                  Signature:  66 *** LXVI *** Sub D XII
  Library:    Corpus Christi College                    Library:    CAMBRIDGE, Parker Library (at Corpus Christi
                                                                    College)
                                                        Coll.:      Catalogus librorum manuscriptorum in Bibliotheca
                                                                    Collegii Corporis Christi quos legavit Matthaeus
                                                                    Parkerus Archiepiscopus Cantuariensis *** A
                                                                    descriptive catalogue of the manuscripts in the
                                                                    library of Corpus Christi College Cambridge ***
                                                                    Catalogus librorum manuscriptorum quos Collegio
               

  65.

  Signature:  12                                        Signature:  12
  Library:    University of Cincinnati Libraries        Library:    University of Cincinnati
                                                        Coll.:      Census of Medieval and Renaissance Manuscripts in
                                                                    the United States and Canada, vol. II (S. De
                                                                    Ricci)
  Location:   Cincinnati                                Location:   Cincinnati
  Authors:    Iohannes Boccaccius                       Authors:    BOCCACCIO, GIOVANNI, 1313-1375
  Notes:      5, 440, 33                                Measures:   folios: 63, height: 290, width: 220
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/sdbm_orphan_47426

----------------------------------------------------------------------------------------------------------------

  73.

  Signature:  Vat. lat. 636                             Signature:  636
  Library:    Biblioteca Apostolica Vaticana            Library:    Bibliotheca Apostolica Vaticana
                                                        Coll.:      Codices Vaticani Latini, I:1-678
  Location:   Città del Vaticano                        Location:   Vatican City *** Holy See
  Authors:    Pseudo Eucherius Lugdunensis              Authors:    Bede, the Venerable, Saint, 673-735 *** Bede, the
                                                                    Venerable, pseudo
  Notes:      127 ff, 1, 71, 72, 124                    Measures:   folios: 127, height: 311, width: 202
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/sdbm_orphan_71037

----------------------------------------------------------------------------------------------------------------------------------
  74.

  Signature:  reg. lat. 1477                      

  80.

  Signature:  ms. C19                                   Signature:  408 *** 968 *** 235 *** 19 *** 37 *** 48 *** 513
                                                                    *** 645 *** 581 *** MS C19
  Library:    Lawrence, University of Kansas,           Library:    Maggs (Librairie) *** James & Mary Lee Tregaskis
              Kenneth Spencer Research Library                      Booksellers *** University of Kansas *** Charles
                                                                    A. Stonehill, Inc.
                                                        Coll.:      Supplement to the Census of Medieval and
                                                                    Renaissance Manuscripts in the United States and
                                                                    Canada (C.U. Faye and W.H. Bond)
  Location:   Lawrence                                  Location:   Lawrence
  Authors:    Iohannes de Mandavilla                    

  88.

  Signature:  Voss. lat. F 75                           Signature:  Leiden, Universiteitsbibliotheek, Vossius lat. f°
                                                                    075
  Library:    Biblioteca universitaria di Leiden        Library:    
                                                        Coll.:      Collection de N. Burbage (16e s.?) ***
                                                                    Bibliothèque de l'Université de Leiden
                                                                    (Universiteitsbibliotheek Leiden) *** Collection
                                                                    d'Isaac Vossius (+1689)
  Location:   Leida                                     Location:   
  Authors:    Franciscus Pipinus                        Authors:    
  Notes:      100 ff, 295x200 mm                        Measures:   
                                                        IRI:        http://ldf.fi/mmm/manifestation_sing

  97.

  Signature:  Arundel 13                                Signature:  13 *** Arundel 13
  Library:    British Library                           Library:    London, British Library *** Royal Society Great
                                                                    Britain
                                                        Coll.:      Catalogue of Manuscripts in the British Museum,
                                                                    N.S.1: Arundel (J. Forshall) *** British Library
                                                                    Manuscripts Catalogue
                                                                    [http://www.bl.uk/catalogues/manuscripts]
  Location:   Londra                                    Location:   London
  Authors:    Franciscus Pipinus                        Authors:    Polo, Marco, 1254-1323? *** PIPINUS, FRANCISCUS,
                                                                    1270-1328
  Notes:    

  103.

  Signature:  Cotton Claudius B. VII                    Signature:  C B VII *** Claudius B. 7
  Library:    British Library                           Library:    London, British Library
                                                        Coll.:      A catalogue of the manuscripts in the Cottonian
                                                                    library: to which are added many emendations and
                                                                    additions. With an appendix containing an account
                                                                    of the damage sustained by the fire in 1731; and
                                                                    also a catalogue of the charters preserved in the
                                                                    same library *** A catalogue of the manuscripts in
                                                                    the Cottonian Library, deposited 

  108.

  Signature:  Cotton Domitianus A. XIII                 Signature:  D XIII *** Domitian A. 13
  Library:    British Library                           Library:    London, British Library
                                                        Coll.:      A catalogue of the manuscripts in the Cottonian
                                                                    library: to which are added many emendations and
                                                                    additions. With an appendix containing an account
                                                                    of the damage sustained by the fire in 1731; and
                                                                    also a catalogue of the charters preserved in the
                                                                    same library *** A catalogue of the manuscripts in
                                                                    the Cottonian Library, deposited 

  115.

  Signature:  Cotton Otho D. I                          Signature:  Otho D. 1
  Library:    British Library                           Library:    London, British Library
                                                        Coll.:      A catalogue of the manuscripts in the Cottonian
                                                                    library: to which are added many emendations and
                                                                    additions. With an appendix containing an account
                                                                    of the damage sustained by the fire in 1731; and
                                                                    also a catalogue of the charters preserved in the
                                                                    same library
  Location:   Londra                                    Location:   London
  Authors:    Iohannes de Mandavilla                    Authors:    Bede, the Ve

  121.

  Signature:  Cotton Vespasian A. XIV                   Signature:  Vespasian A. 14
  Library:    British Library                           Library:    London, British Library
                                                        Coll.:      A catalogue of the manuscripts in the Cottonian
                                                                    library: to which are added many emendations and
                                                                    additions. With an appendix containing an account
                                                                    of the damage sustained by the fire in 1731; and
                                                                    also a catalogue of the charters preserved in the
                                                                    same library
  Location:   Londra                                    Location:   London
  Authors:    Opus sine auctore                         Authors:    Gregor

  127.

  Signature:  Harley 3099                               Signature:  Harley 3099
  Library:    British Library                           Library:    London, British Library *** Prämonstratenserstift
                                                                    Arnstein an der Lahn
                                                        Coll.:      British Library Manuscripts Catalogue
                                                                    [http://www.bl.uk/catalogues/manuscripts]
  Location:   Londra                                    Location:   London
  Authors:    Johannes Presbyter                        Authors:    Isidore, of Seville, Saint, -636
  Notes:      168 ff, 345x238 mm                        Measures:   folios: 168, height: 345, width: 235
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/sdbm_orphan_208517

-----------------------------------------------------------------------------

  136.

  Signature:  Harley 5115                               Signature:  Harley 5115
  Library:    British Library                           Library:    London, British Library
                                                        Coll.:      British Library Manuscripts Catalogue
                                                                    [http://www.bl.uk/catalogues/manuscripts]
  Location:   Londra                                    Location:   London
  Authors:    Franciscus Pipinus                        Authors:    Geoffrey, of Monmouth, Bishop of St. Asaph,
                                                                    1100?-1154 *** Polo, Marco, 1254-1323? ***
                                                                    PIPINUS, FRANCISCUS, 1270-1328 *** Hayton, Frère,
                                                                    approximately 1235-approximately 1314
  Notes:      152 ff, 320x205 mm                        Measures:   folios: 152, h

  144.

  Signature:  10                                        Signature:  10
  Library:      Maria Saal, Stiftsbibliothek            Library:    Archiv Collegiatstiftes (Maria Saal, Austria)
                                                        Coll.:      HMML - Maria Saal Archiv des Collegiatstiftes
  Location:   Maria Saal                                Location:   
  Authors:    Burchardus de Monte Sion                  Authors:    Gregory, I, Pope, approximately 540-604 *** Hugh,
                                                                    of Saint-Victor, 1096?-1141 *** Heinrich, von
                                                                    Langenstein, approximately 1325-1397 *** Aquinas,
                                                                    Thomas, Saint, 1225-1274
  Notes:      239 ff, 205x145 mm                        Measures:   folios: 239
                                                        IRI:        http://ldf.fi/mmm/manifestation_s

  154.

  Signature:  O 80 sup.                                 Signature:  IMILAO080S/00
  Library:    Biblioteca Ambrosiana                     Library:    MILANO, Biblioteca Ambrosiana
                                                        Coll.:      
  Location:   Milano                                    Location:   
  Authors:    Leo Baptista Albertus                     Authors:    ALBERTI, LEON BATTISTA, 1404-1472
  Notes:      78, 230x160 mm                            Measures:   folios: 78, height: 230, width: 160
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/sdbm_orphan_108154

----------------------------------------------------------------------------------------------------------------------------------
  155.

  Signature:  P 25 sup.                                 Signature:  IMILAP025S/00
  Library:    Biblioteca Ambrosiana                     Library:    MILANO, Biblioteca Ambrosiana
                   

  164.

  Signature:  Fonds de la ville 31                      Signature:  31
  Library:      Namur, Musée des Arts Anciens du        Library:    Namur, Fonds de la Ville
              Namurois
                                                        Coll.:      Catalogue des manuscrits conserves a Namur, v. I,
                                                                    (P. Faider, ed)
  Location:   Namur                                     Location:   Namur
  Authors:    Sanctus Brendanus                         Authors:    Thomas, de Cantimpré, approximately
                                                                    1200-approximately 1270 *** Jerome, Saint, -419 or
                                                                    420
  Notes:      76 ff, 270x202 mm                         Measures:   folios: 76, height: 270, width: 202
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/sdbm_orphan_146029



  170.

  Signature:  Marston 17                                Signature:  New Haven, Yale University, Beinecke Library,
                                                                    Marston MS 17
  Library:    Yale University Library                   Library:    
                                                        Coll.:      Vente, London, Sotheby's, 1825, 14 mars (vente
                                                                    Celotti) *** Bibliothèque du collège des Jésuites
                                                                    d'Agen *** Collection de C.A. Stonehill ***
                                                                    Collection de l'abbé Luigi Celotti (+ v.1846) ***
                                                                    Collection de Thomas E. Marston *** Bibliothèque
                                                                    de Sir Thomas Phillipps († 1872) *** Fonds de
                                 

  177.

  Signature:  160                                       Signature:  MS. Bodl. 160
  Library:    Biblioteca Bodleiana                      Library:    University of Oxford *** Canterbury, Christ Church
                                                                    Cathedral Priory (Benedictine)
                                                        Coll.:      MSS. Bodl. (Bodley)
  Location:   Oxford                                    Location:   Oxford
  Authors:    Beda Venerabilis                          Authors:    Justinian, I, Emperor of the East, 483?-565
  Notes:      90 ff                                     Measures:   
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/bodley_manuscript_1221

----------------------------------------------------------------------------------------------------------------------------------
  178.

  Signature:  175                                       Signature:  MS. Dig

  187.

  Signature:  Bodl. 750 (S.C. 2661)                     Signature:  MS. Bodl. 750
  Library:    Biblioteca Bodleiana                      Library:    University of Oxford
                                                        Coll.:      MSS. Bodl. (Bodley)
  Location:   Oxford                                    Location:   Oxford
  Authors:    Opus sine auctore                         Authors:    Hugh, of Saint-Victor, 1096?-1141 *** Bernard, of
                                                                    Clairvaux, Saint, 1090 or 1091-1153, pseudo ***
                                                                    Grosseteste, Robert, 1175?-1253
  Notes:      129 ff                                    Measures:   
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/bodley_manuscript_1737

-------------------------------------------------------------------------------------------------------------------------

  197.

  Signature:  Digby 11 (S.C. 1612)                      Signature:  OXFORD, Bodleian Library, Digby 011
  Library:    Biblioteca Bodleiana                      Library:    
                                                        Coll.:      
  Location:   Oxford                                    Location:   Oxford
  Authors:    Iohannes de Plano Carpini *** Odoricus    Authors:    
              de Portu Naonis
  Notes:      1, 91, 92, 103, 013, 203 ff               Measures:   
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/bibale_14434

----------------------------------------------------------------------------------------------------------------------------------
  198.

  Signature:  Digby 11 (S.C. 1612)                      Signature:  OXFORD, Bodleian Library, Digby 011 (Madan 01612)
  Library:    Biblioteca Bodleiana                      Library:    
                                                        C

  206.

  Signature:  Fairfax 23                                Signature:  MS. Fairfax 23
  Library:    Biblioteca Bodleiana                      Library:    University of Oxford
                                                        Coll.:      MSS. Fairfax
  Location:   Oxford                                    Location:   Oxford
  Authors:    Iohannes de Mandavilla                    Authors:    MANDEVILLE, JOHN, Sir
  Notes:      226 ff                                    Measures:   
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/bodley_manuscript_4858

----------------------------------------------------------------------------------------------------------------------------------
  207.

  Signature:  Holkham misc. 28                          Signature:  MS. Holkham misc. 28
  Library:    Biblioteca Bodleiana                      Library:    University of Oxford *** Holkham Hall (Norfolk),
                          

  214.

  Signature:   Laud misc. 410 (S.C. 1071)               Signature:  MS. Laud Misc. 410
  Library:    Biblioteca Bodleiana                      Library:    University of Oxford
                                                        Coll.:      MSS. Laud Misc. (Laud miscellaneous)
  Location:   Oxford                                    Location:   Oxford
  Authors:    Sanctus Brendanus                         Authors:    Walahfrid Strabo, 807?-849 *** Paulus Neapolitanus
                                                                    ca. 640 *** Bertrando, di La Tour, -1332?
  Notes:      113 ff, 250x165 mm                        Measures:   
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/bodley_manuscript_7211

----------------------------------------------------------------------------------------------------------------------------------
  215.

  Signature:  Laud misc. 44 (S.C. 545)                  Signature

  224.

  Signature:  Rawlinson B.188                           Signature:  MS. Rawl. B. 188
  Library:    Biblioteca Bodleiana                      Library:    University of Oxford
                                                        Coll.:      MSS. Rawl. B (Rawlinson B)
  Location:   Oxford                                    Location:   Oxford
  Authors:    Giraldus Cambrensis                       Authors:    GIRALDUS, CAMBRENSIS
  Notes:      98 ff                                     Measures:   
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/bodley_manuscript_7991

----------------------------------------------------------------------------------------------------------------------------------
  225.

  Signature:  Selden supra 34 (S.C. 3422)               Signature:  MS. Selden Supra 34
  Library:    Biblioteca Bodleiana                      Library:    University of Oxford
                                        

  234.

  Signature:  2477                                      Signature:  Latin 2477
  Library:    Bibliothèque nationale de France          Library:    PARIS, Bibliothèque nationale de France,
                                                                    Manuscrits
                                                        Coll.:      Bibliotheque nationale de France, Archives et
                                                                    Manuscrits (Online Catalogue)
  Location:   Parigi                                    Location:   Paris
  Authors:    Iohannes de Plano Carpini                 Authors:    Augustine, Saint, Bishop of Hippo *** Honorius, of
                                                                    Autun, approximately 1080-approximately 1156 ***
                                                                    GIOVANNI, DA PIAN DEL CARPINE, Archbishop of
                                                                    Antivari, -1252 *** Ma

  242.

  Signature:  Nouv. Acq. lat. 905                       Signature:  247 *** NAL 905
  Library:    Bibliothèque nationale de France          Library:    PARIS, Bibliothèque nationale de France,
                                                                    Manuscrits
                                                        Coll.:      Bibliotheque nationale de France, Archives et
                                                                    Manuscrits (Online Catalogue)
  Location:   Parigi                                    Location:   London
  Authors:    Iohannes Boccaccius                       Authors:    BOCCACCIO, GIOVANNI, 1313-1375
  Notes:      903, 247                                  Measures:   folios: 173, height: 210, width: 145
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/sdbm_4066

------------------------------------------------------------------------------------------------------------

  249.

  Signature:  lat. 12597                                Signature:  Latin 12597
  Library:    Bibliothèque nationale de France          Library:    Corbie, Benedictine abbey *** PARIS, Bibliothèque
                                                                    nationale de France, Manuscrits *** Abbaye de
                                                                    Saint-Germain-des-Prés (Paris) (543-1794)
                                                        Coll.:      Bibliotheque nationale de France, Archives et
                                                                    Manuscrits (Online Catalogue)
  Location:   Parigi                                    Location:   Paris
  Authors:    Sanctus Brendanus                         Authors:    
  Notes:      171 ff, 310x210 mm                        Measures:   folios: 171, height: 310, width: 215
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/

  257.

  Signature:  lat. 1616                                 Signature:  Latin 1616
  Library:    Bibliothèque nationale de France          Library:    PARIS, Bibliothèque nationale de France,
                                                                    Manuscrits
                                                        Coll.:      Bibliotheque nationale de France, Archives et
                                                                    Manuscrits (Online Catalogue)
  Location:   Parigi                                    Location:   Paris
  Authors:    Johannes Presbyter *** Franciscus         Authors:    Petrarca, Francesco, 1304-1374 *** Jacques, de
              Pipinus                                               Vitry, approximately 1170-1240 *** Dares,
                                                                    Phrygius, 5th-6th cent. (?) *** Benvenutus, de
                                                                    Imola, -1387 or 1388 *** Bruni,

  263.

  Signature:  lat. 1827                                 Signature:  Latin 1827
  Library:    Bibliothèque nationale de France          Library:    PARIS, Bibliothèque nationale de France,
                                                                    Manuscrits
                                                        Coll.:      Bibliotheque nationale de France, Archives et
                                                                    Manuscrits (Online Catalogue)
  Location:   Parigi                                    Location:   Paris
  Authors:    Beda Venerabilis                          Authors:    BEDE, THE VENERABLE, Saint, 673-735 *** Gennadius,
                                                                    of Marseilles, active 5th century *** Jerome,
                                                                    Saint, -419 or 420
  Notes:      403 ff, 330x220 mm                        Measures:   folios: 403, height: 330, width: 220
              

  270.

  Signature:  lat. 2384                                 Signature:  Latin 2384
  Library:    Bibliothèque nationale de France          Library:    PARIS, Bibliothèque nationale de France,
                                                                    Manuscrits
                                                        Coll.:      Bibliotheque nationale de France, Archives et
                                                                    Manuscrits (Online Catalogue)
  Location:   Parigi                                    Location:   Paris
  Authors:    Beda Venerabilis                          Authors:    Gregory, I, Pope, approximately 540-604 *** BEDE,
                                                                    THE VENERABLE, Saint, 673-735 *** Alcuin, 735-804
                                                                    *** Nicholas, of Lyra, approximately 1270-1349 ***
                                                                    Jerome, Saint, -

  276.

  Signature:  lat. 2860                                 Signature:  Latin 2860
  Library:    Bibliothèque nationale de France          Library:    PARIS, Bibliothèque nationale de France,
                                                                    Manuscrits
                                                        Coll.:      Bibliotheque nationale de France, Archives et
                                                                    Manuscrits (Online Catalogue)
  Location:   Parigi                                    Location:   Paris
  Authors:    Opus sine auctore                         Authors:    
  Notes:      84 ff, 185x135 mm                         Measures:   folios: 84, height: 185, width: 135
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/sdbm_orphan_219233

----------------------------------------------------------------------------------------------------------------------------------
  277

  284.

  Signature:  lat. 3784                                 Signature:  Latin 3784
  Library:    Bibliothèque nationale de France          Library:    Limoges, Saint-Sauveur, puis Saint-Martial, abbaye
                                                                    *** PARIS, Bibliothèque nationale de France,
                                                                    Manuscrits
                                                        Coll.:      Bibliotheque nationale de France, Archives et
                                                                    Manuscrits (Online Catalogue)
  Location:   Parigi                                    Location:   Paris
  Authors:    Sanctus Brendanus                         Authors:    Ademarus Cabanensis (988?-1034) *** Jerome, Saint,
                                                                    -419 or 420
  Notes:      1, 131, 2, 132, 034                       Measures:   folios: 131
                                     

  291.

  Signature:  lat. 4806                                 Signature:  Latin 4806
  Library:    Bibliothèque nationale de France          Library:    PARIS, Bibliothèque nationale de France,
                                                                    Manuscrits
                                                        Coll.:      Bibliotheque nationale de France, Archives et
                                                                    Manuscrits (Online Catalogue)
  Location:   Parigi                                    Location:   Paris
  Authors:    Dicuil Hibernicus                         Authors:    Aethicus Ister *** Antoninus Augustus
  Notes:      40 ff, 252x205 mm                         Measures:   
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/sdbm_orphan_221962

----------------------------------------------------------------------------------------------------------------------------------
  2

  298.

  Signature:  lat. 4999 A                               Signature:  Latin 4999
  Library:    Bibliothèque nationale de France          Library:    PARIS, Bibliothèque nationale de France,
                                                                    Manuscrits
                                                        Coll.:      Bibliotheque nationale de France, Archives et
                                                                    Manuscrits (Online Catalogue)
  Location:   Parigi                                    Location:   Paris
  Authors:    Beda Venerabilis                          Authors:    Isidore, of Seville, Saint, -636 *** BEDE, THE
                                                                    VENERABLE, Saint, 673-735 *** Geoffrey, of
                                                                    Monmouth, Bishop of St. Asaph, 1100?-1154 ***
                                                                    Damasus, I, Pope, 305-384 *** E

  306.

  Signature:  lat. 5515a                                Signature:  Latin 5515A
  Library:    Bibliothèque nationale de France          Library:    PARIS, Bibliothèque nationale de France,
                                                                    Manuscrits
                                                        Coll.:      Bibliotheque nationale de France, Archives et
                                                                    Manuscrits (Online Catalogue)
  Location:   Parigi                                    Location:   Paris
  Authors:    Opus sine auctore                         Authors:    
  Notes:                                                Measures:   
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/sdbm_orphan_223023

----------------------------------------------------------------------------------------------------------------------------------
  307.

  Signature:  lat. 5572        

  314.

  Signature:  lat. 6244a                                Signature:  Latin 6244A
  Library:    Bibliothèque nationale de France          Library:    PARIS, Bibliothèque nationale de France,
                                                                    Manuscrits
                                                        Coll.:      Bibliotheque nationale de France, Archives et
                                                                    Manuscrits (Online Catalogue)
  Location:   Parigi                                    Location:   Paris
  Authors:    Johannes Presbyter                        Authors:    Eugenius, IV, Pope, 1383-1447 *** Paul, of Venice,
                                                                    1372-1429 *** Pipinus, Franciscus, 1270-1328
  Notes:      266 ff, 217x170 mm                        Measures:   
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/sdbm_orphan_223960

-------

  322.

  Signature:  n.a. lat. 781                             Signature:  Paris, Bibliothèque nationale de France,
                                                                    Manuscrits, nouv. acq. lat. 0781
  Library:    Bibliothèque nationale de France          Library:    PARIS, Bibliothèque nationale de France,
                                                                    Manuscrits *** London, Bernard Quaritch Ltd.
                                                                    (1847-) *** Padua, Benedictine abbey of Santa
                                                                    Guistina
                                                        Coll.:      Vente, London, Sotheby's, 1903, 27 avril-2 mai
                                                                    (Vente Phillipps) *** Collection de Frederick
                                                                    North, 5e comte de Guilford *** Vente, London,
                         

  329.

  Signature:  1414                                      Signature:  Reims (F), Bibliothèque municipale, 1414 (M. 822)
  Library:    Bibliothèque municipale de Reims          Library:    Abbaye Saint-Euverte (Orléans)
                                                        Coll.:      
  Location:   Reims                                     Location:   
  Authors:    Rorgus Fretellus de Nazareth              Authors:    
  Notes:      102 ff, 362x260 mm                        Measures:   
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/bibale_27748

----------------------------------------------------------------------------------------------------------------------------------
  330.

  Signature:  1414                                      Signature:  1414
  Library:    Bibliothèque municipale de Reims          Library:    Bibliothèque municipale de Reims
                                                        Coll.:

  338.

  Signature:  A. 506 (666)                              Signature:  666
  Library:    Bibliothèque Jacques Villon               Library:    Bibliothèque municipale de Rouen
                                                        Coll.:      Catalogue general des manuscrits des bibliotheques
                                                                    publiques de France, v. 1
  Location:   Rouen                                     Location:   Rouen
  Authors:    Opus sine auctore                         Authors:    
  Notes:      280 ff, 182x124 mm                        Measures:   folios: 280, height: 182, width: 124
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/sdbm_orphan_132116

----------------------------------------------------------------------------------------------------------------------------------
  339.

  Signature:  U. 102 (1393)                             Signature:  1393
  Library:    Bi

  346.

  Signature:  1040                                      Signature:  Tours, Bibliothèque municipale, 1040
  Library:    Biblioteca municipale di Tours            Library:    
                                                        Coll.:      Bibliothèque municipale de Tours *** Bibliothèque
                                                                    de Saint-Gatien de Tours
  Location:   Tours                                     Location:   Tours
  Authors:    Opus sine auctore                         Authors:    
  Notes:      133 ff, 165x235 mm                        Measures:   
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/bibale_49119

----------------------------------------------------------------------------------------------------------------------------------
  347.

  Signature:  Fonds ancien 1876                         Signature:  Troyes (F), Médiathèque du Grand Troyes, 1876
  Library:    Médi

  357.

  Signature:  Weiss. 41 (4125)                          Signature:  DWOLH041WEISSB/
  Library:    Herzog August Bibliothek                  Library:    Herzog August Bibliothek
                                                        Coll.:      
  Location:   Wolfenbüttel                              Location:   
  Authors:    Iohannes de Plano Carpini                 Authors:    Bacon, Roger, 1214?-1294
  Notes:      254 ff, 295x217 mm                        Measures:   folios: 253
                                                        IRI:        http://ldf.fi/mmm/manifestation_singleton/sdbm_orphan_107612

----------------------------------------------------------------------------------------------------------------------------------
  358.

  Signature:  M. ch. f. 32                              Signature:  DWURUCF032/00
  Library:    University Library Würzburg               Library:    Würzburg, University Library
                                                        

In [139]:
wrong_indices = [6, 38, 53, 81, 84, 142, 151, 169, 172, 208, 352] #indici dei match errati individuati in seguito alla revisione manuale

# rimozione dei match errati
correct_matches = results.copy()
for i in wrong_indices:
    correct_matches.remove(results[i])

correct_matches.sort(key=lambda match: (match[0]['Location'], match[0]['Library'], match[0]['Signature']))

# Recupero della conoscenza sui Mss. mappati

In [175]:
imgIRIs = {} # associa gli IRI dei mss. Imago alla rispettiva coppia segnatura-biblioteca.
with open('imago_IRI.csv', encoding='utf-8') as file:
    csv_file = list(csv.reader(file))[1:]
    sign_lib_tuples = set([(row[1], row[2]) for row in csv_file])
    for tup in sign_lib_tuples:
        iris = [row[0] for row in csv_file if tuple(row[1:])==tup]
        imgIRIs[tup] = iris
        

In [141]:
# lista di tutti gli IRI dei mss. MMM mappati
mmmIRIs = [x[1]['IRI'] for x in correct_matches]
mmmIRIs = sorted(set(mmmIRIs), key=mmmIRIs.index) 

In [123]:
namespace = {
    'owl:':'http://www.w3.org/2002/07/owl#',
    'rdf:':'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
    'xml:':'http://www.w3.org/XML/1998/namespace',
    'xsd:':'http://www.w3.org/2001/XMLSchema#',
    'rdfs:':'http://www.w3.org/2000/01/rdf-schema#',
    'ecrm:':'http://erlangen-crm.org/current/',
    'efrbroo:':'http://erlangen-crm.org/efrbroo/',
    'SKOS:':'http://www.w3.org/2004/02/skos/core#',
    'mmms:':'http://ldf.fi/schema/mmm/'
}

owlClasses = [
    'efrbroo:F4_Manifestation_Singleton',
    'ecrm:E21_Person',
    'ecrm:E74_Group',
    'ecrm:E39_Actor',
    'ecrm:E33_Linguistic_Object',
    'efrbroo:F2_Expression',
    'ecrm:E78_Collection',
    'mmms:Source'       
]

properties = [ 
    'rdf:type',
    'SKOS:prefLabel',
    'ecrm:P51_has_former_or_current_owner', 
    'ecrm:P128_carries', 
    'ecrm:P46i_forms_part_of', 
    'ecrm:P3_has_note', 
    'mmms:data_provider_url'
]


# IRI delle entità collegate ai mss. mappati, di cui si recupereranno in seguito le labels (SKOS:prefLabel) e le classi di appartenenza (rdf:type)
linkedEntities = [] 

In [142]:
# esegue una query a partire dall'IRI di un'entità e restiuisce una lista di stringhe contenenti ciascuna una coppia proprietà-oggetto
def getKnowledge(iri, manuscriptQuery=False):
    
    # se la query riguarda un manoscritto, si considerano tutte le proprietà della lista 'properties',
    # se invece la query riguarda un'entità collegata al manoscritto (opera, collezione ecc.) si considerano solo le proprietà rdf:type e SKOS:label
    queryProperties = properties if manuscriptQuery else properties[:2]
    
    sparql = SPARQLWrapper('http://ldf.fi/mmm/sparql')

    sparql.setQuery(f"""
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX ecrm: <http://erlangen-crm.org/current/>
        PREFIX SKOS: <http://www.w3.org/2004/02/skos/core#>
        PREFIX mmms: <http://ldf.fi/schema/mmm/>

        SELECT DISTINCT ?p ?o WHERE {{
          <{iri}> ?p ?o .
          FILTER(?p in ({', '.join(queryProperties)}))
        }}                                                           

    """)

    ret = sparql.queryAndConvert()
    xml = ret.toxml()
    tree = ET.ElementTree(ET.fromstring(xml))
    root = tree.getroot()
    prop_obj_strings = [] # lista delle stringhe contenenti una coppia proprietà-oggetto
    for result in root.findall('.//*{http://www.w3.org/2005/sparql-results#}result'):

        proprty =  result[0][0].text
        obj = result[1][0].text
        obj_type = result[1][0].tag #per distinguere uri dalle stringhe (labels e note)

        for key in namespace:
            proprty = proprty.replace(namespace[key], key)
        
        if obj_type == '{http://www.w3.org/2005/sparql-results#}uri':
            if proprty == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type':
                for key in namespace:
                    obj = obj.replace(namespace[key], key) 
            else:
                if manuscriptQuery:
                    linkedEntities.append(obj)
                obj = f'<{obj}>'
        else:
            obj = obj.replace('"', '\\"') # metto un escape prima di eventuali virgolette 
            obj = f'"{obj}"^^xsd:string'
            obj = obj.replace('\n', ' ')

        prop_obj_strings.append(f"  {proprty} {obj}")

    return prop_obj_strings
    

In [144]:
# file Turtle contenente le triple RDF
with open('file_generati/knowledge.ttl', 'w', encoding='utf-8') as file:
    
    for prefix in namespace:
        file.write(f"@prefix {prefix} <{namespace[prefix]}> .\n")
    file.write('\n')
    
    for owlClass in owlClasses:
        file.write(f"{owlClass} rdf:type owl:Class .\n")
    file.write('ecrm:E21_Person rdfs:subClassOf ecrm:E39_Actor .\necrm:E74_Group rdfs:subClassOf ecrm:E39_Actor .\n\n')
    
    for prop in properties[1:]:
        file.write(f"{prop} rdf:type owl:ObjectProperty .\n")
    file.write("ecrm:P46i_forms_part_of owl:inverseOf ecrm:P46_is_composed_of .\n")
    
    for mmmIRI in mmmIRIs:
        lines = getKnowledge(mmmIRI, True)        
        for imgRecord in [match[0] for match in correct_matches if match[1]['IRI']==mmmIRI]:
            sign_lib = (imgRecord['Signature'], imgRecord['Library'])
            for imgIRI in imgIRIs[sign_lib]:
                lines.append(f"  ecrm:P46_is_composed_of <{imgIRI}>")

        file.write(f'\n<{mmmIRI}>\n')
        lines = [string+' ;\n' if n<len(lines)-1 else string+' .\n' for n, string in enumerate(lines)]
        file.writelines(lines)
    
    
    for entityIRI in set(linkedEntities):
        lines = getKnowledge(entityIRI)
        if lines:
            file.write(f'\n<{entityIRI}>\n')
            lines = [string+' ;\n' if n<len(lines)-1 else string+' .\n' for n, string in enumerate(lines)]
            file.writelines(lines)