In [1]:
from elasticsearch import Elasticsearch
import requests
import os
import json
import pyld.jsonld
from bs4 import BeautifulSoup

In [2]:
es = Elasticsearch("http://localhost:9200")
es.info().body

{'name': '2138f4a3d3b2',
 'cluster_name': 'docker-cluster',
 'cluster_uuid': 'YglcgfWLSCSyeScmTVLZjA',
 'version': {'number': '8.10.2',
  'build_flavor': 'default',
  'build_type': 'docker',
  'build_hash': '6d20dd8ce62365be9b1aca96427de4622e970e9e',
  'build_date': '2023-09-19T08:16:24.564900370Z',
  'build_snapshot': False,
  'lucene_version': '9.7.0',
  'minimum_wire_compatibility_version': '7.17.0',
  'minimum_index_compatibility_version': '7.0.0'},
 'tagline': 'You Know, for Search'}

In [3]:
current_directory = os.getcwd()
cantus_url = current_directory + '\cantusdb\jsonld\compact.jsonld'
simssa_url = current_directory + '\simssadb\jsonld\compact.jsonld'
musicbrainz_url = current_directory + '\musicbrainz\jsonld\looping-approach\compact.jsonld'

with open(cantus_url, 'r') as json_file:
    cantus_compact = json.load(json_file)
    cantus_expand = pyld.jsonld.expand(cantus_compact) 


with open(simssa_url, 'r') as json_file:
    simssa_compact = json.load(json_file)
    simssa_expand = pyld.jsonld.expand(simssa_compact) 

with open(musicbrainz_url, 'r') as json_file:
    musicbrainz_compact = json.load(json_file)
    musicbrainz_expand = pyld.jsonld.expand(musicbrainz_compact) 

In [7]:
def remove_url(data):
    def extract_last_part(url):
        parts = url.rstrip('/').split('/')
        return parts[-1] if parts[-1] else parts[-2]

    if isinstance(data, dict):
        new_data = {}
        for key, value in data.items():
            new_key = extract_last_part(key)
            new_data[new_key] = remove_url(value)
        return new_data
    elif isinstance(data, list):
        return [remove_url(item) for item in data]
    else:
        return data

In [8]:
from mappings2 import example_mapping as flat_mapping
es.indices.create(index="flat", mappings=flat_mapping['mappings'])

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'flat'})

In [9]:
from mappings3 import updated_mapping as nested_mapping
es.indices.create(index="nested", mappings=nested_mapping['mappings'])

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'nested'})

In [10]:
index_name = "flat"

In [11]:
json_data = cantus_expand
for ind, row in enumerate(json_data):
    response = es.index(index=index_name, document=remove_url(row), id=None)

    # Check the response from Elasticsearch
    if response["result"] != "created":
        print("Failed to index the document index", ind)
json_data = simssa_expand
for ind, row in enumerate(json_data):
    response = es.index(index=index_name, document=remove_url(row), id=None)

    # Check the response from Elasticsearch
    if response["result"] != "created":
        print("Failed to index the document index", ind)

In [12]:
es.count(index=index_name, query={"match_all": {}})

ObjectApiResponse({'count': 167, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}})

In [13]:
index_name = "nested"

In [14]:
json_data = cantus_expand
for ind, row in enumerate(json_data):
    response = es.index(index=index_name, document=remove_url(row), id=None)

    # Check the response from Elasticsearch
    if response["result"] != "created":
        print("Failed to index the document index", ind)
json_data = simssa_expand
for ind, row in enumerate(json_data):
    response = es.index(index=index_name, document=remove_url(row), id=None)

    # Check the response from Elasticsearch
    if response["result"] != "created":
        print("Failed to index the document index", ind)

In [15]:
es.count(index=index_name, query={"match_all": {}})

ObjectApiResponse({'count': 167, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}})

In [16]:
default_mapping = es.indices.get_mapping(index=index_name)[index_name]

In [22]:
# query = {
#         "match": {
#             "P86.P2561.@value": "anonymous"
#         }
#     }
query = {
        "fuzzy": {
            "P86.P2561.@value": "anonimous"
        }
    }

results = es.search(index='flat', query=query)

for hit in results["hits"]["hits"]:
    print(hit["_source"])

{'@id': 'https://cantusdatabase.org/chant/561749', '@type': ['http://www.wikidata.org/entity/Q23072435'], 'id-numbers': [{'@id': 'https://cantusindex.org/id/006540a'}], 'P86': [{'@id': 'http://www.wikidata.org/entity/Q4233718', 'P2561': [{'@value': 'Anonymous'}]}], 'Dataset': [{'@id': 'https://cantusdatabase.org/'}], 'Q4484726': [{'@value': 'F'}], 'P136': [{'@value': 'Responsory verse'}], 'P1922': [{'@value': 'Vere dominus est in loco '}], 'Q731978': [{'@id': 'http://www.wikidata.org/entity/Q960729', 'P2561': [{'@value': 'dorian'}]}], 'source': [{'@id': 'https://cantusdatabase.org/source/123756'}]}
{'@id': 'https://cantusdatabase.org/chant/179688', '@type': ['http://www.wikidata.org/entity/Q23072435'], 'id-numbers': [{'@id': 'https://cantusindex.org/id/003251'}], 'P86': [{'@id': 'http://www.wikidata.org/entity/Q4233718', 'P2561': [{'@value': 'Anonymous'}]}], 'Dataset': [{'@id': 'https://cantusdatabase.org/'}], 'Q4484726': [{'@value': 'E'}], 'P136': [{'@id': 'http://www.wikidata.org/ent

In [29]:
field_alias_dict = {
    "P1476": "title",
    "P86": "composer",
    "P136": "genre",
    "P826": "tonality",
    "P2701": "file format",
    "P577": "publication date",
    "P2699": "URL",
    "P50": "author",
    "P175": "performer",
    "P1545": "series ordinal",
    "Q4484726": "final",
    "P2561": "name",
    "P135": "movement",
    "P1299": "depicted by",
    "Q731978": "mode",
}

In [33]:
def update_mapping_with_aliases(existing_mapping, field_alias_dict):
    def add_aliases_to_properties(properties, alias_dict, parent_path=""):
        updated_properties = properties.copy()
        for field, field_info in properties.items():
            
            field_path = parent_path + field

            if "properties" in field_info:
                    
                    nested_properties = field_info["properties"]
                    updated_nested = add_aliases_to_properties(
                        nested_properties, alias_dict, field_path + "."
                    )
                    field_info["properties"] = updated_nested

                  
            if field in alias_dict:
                # print('now in',field,'add alias to',field_path)
                alias_name = alias_dict[field]
                field_fields = updated_properties[field]['properties']
                field_fields[alias_name] = {
                    "type": "alias",
                    "path": field_path
                }


        return updated_properties
    
    if "properties" not in existing_mapping.get("mappings", {}):
        raise ValueError("The 'properties' key is missing in the existing mapping.")

    properties = existing_mapping["mappings"]["properties"]
    updated_properties = add_aliases_to_properties(properties, field_alias_dict)

    output_mapping = {}
    out_temp = {}
    out_temp["properties"] = updated_properties
    output_mapping["mappings"] = out_temp
    return output_mapping

In [6]:
if es.indices.exists(index="flat"):
    es.indices.delete(index="flat")