In [1]:
from elasticsearch import Elasticsearch
import requests
import os
import json
import pyld.jsonld
from bs4 import BeautifulSoup

In [2]:
es = Elasticsearch("http://localhost:9200")
es.info().body

{'name': '91afa29ea9cc',
 'cluster_name': 'docker-cluster',
 'cluster_uuid': 'HG8mry3STnGrQehEiJ3QgA',
 'version': {'number': '8.10.2',
  'build_flavor': 'default',
  'build_type': 'docker',
  'build_hash': '6d20dd8ce62365be9b1aca96427de4622e970e9e',
  'build_date': '2023-09-19T08:16:24.564900370Z',
  'build_snapshot': False,
  'lucene_version': '9.7.0',
  'minimum_wire_compatibility_version': '7.17.0',
  'minimum_index_compatibility_version': '7.0.0'},
 'tagline': 'You Know, for Search'}

In [3]:
current_directory = os.getcwd()
cantus_url = current_directory + '\cantusdb\jsonld\compact.jsonld'
simssa_url = current_directory + '\simssadb\jsonld\compact.jsonld'
musicbrainz_url = current_directory + '\musicbrainz\jsonld\looping-approach\compact.jsonld'

with open(cantus_url, 'r') as json_file:
    cantus_compact = json.load(json_file)
    cantus_expand = pyld.jsonld.expand(cantus_compact) 


with open(simssa_url, 'r') as json_file:
    simssa_compact = json.load(json_file)
    simssa_expand = pyld.jsonld.expand(simssa_compact) 

with open(musicbrainz_url, 'r') as json_file:
    musicbrainz_compact = json.load(json_file)
    musicbrainz_expand = pyld.jsonld.expand(musicbrainz_compact) 

In [4]:
def remove_url(data):
    def extract_last_part(url):
        parts = url.rstrip('/').split('/')
        return parts[-1] if parts[-1] else parts[-2]

    if isinstance(data, dict):
        new_data = {}
        for key, value in data.items():
            new_key = extract_last_part(key)
            new_data[new_key] = remove_url(value)
        return new_data
    elif isinstance(data, list):
        return [remove_url(item) for item in data]
    else:
        return data

In [5]:
from mappings2 import example_mapping
es.indices.create(index="example", mappings=example_mapping['mappings'])

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'example'})

In [26]:
index_name = "example"  
json_data = cantus_expand
for ind, row in enumerate(json_data):
    response = es.index(index=index_name, document=remove_url(row), id=None)

    # Check the response from Elasticsearch
    if response["result"] != "created":
        print("Failed to index the document index", ind)

In [8]:
es.count(index='example', query={"match_all": {}})

ObjectApiResponse({'count': 50, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}})

In [27]:
index_name = "example"  
json_data = simssa_expand
for ind, row in enumerate(json_data):
    response = es.index(index=index_name, document=remove_url(row), id=None)

    # Check the response from Elasticsearch
    if response["result"] != "created":
        print("Failed to index the document index", ind)

In [11]:
es.count(index='example', query={"match_all": {}})

ObjectApiResponse({'count': 167, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}})

In [28]:
default_mapping = es.indices.get_mapping(index="example2")['example2']

In [25]:
# query = {
#         "match": {
#             "P86.P2561.@value": "anonymous"
#         }
#     }
query = {
        "fuzzy": {
            "composer": "anonimous"
        }
    }

results = es.search(index="example", query=query)

for hit in results["hits"]["hits"]:
    print(hit["_source"])

{'@id': 'https://cantusdatabase.org/chant/561440', '@type': ['http://www.wikidata.org/entity/Q23072435'], 'id-numbers': [{'@id': 'https://cantusindex.org/id/001197'}], 'P86': [{'@id': 'http://www.wikidata.org/entity/Q4233718', 'P2561': [{'@value': 'Anonymous'}]}], 'Dataset': [{'@id': 'https://cantusdatabase.org/'}], 'Q4484726': [{'@value': 'E'}], 'P136': [{'@id': 'http://www.wikidata.org/entity/Q582093', 'P2561': [{'@value': 'Antiphon'}]}], 'P1922': [{'@value': 'A viro iniquo libera me '}], 'Q731978': [{'@id': 'http://www.wikidata.org/entity/Q1641387', 'P2561': [{'@value': 'hypophrygian'}]}], 'source': [{'@id': 'https://cantusdatabase.org/source/123756'}]}
{'@id': 'https://cantusdatabase.org/chant/671808', '@type': ['http://www.wikidata.org/entity/Q23072435'], 'id-numbers': [{'@id': 'https://cantusindex.org/id/007640'}], 'P86': [{'@id': 'http://www.wikidata.org/entity/Q4233718', 'P2561': [{'@value': 'Anonymous'}]}], 'Dataset': [{'@id': 'https://cantusdatabase.org/'}], 'Q4484726': [{'@v

In [28]:
def get_aliases(field_name):

    if field_name[0] == 'P':
        wikidata_url = f"http://www.wikidata.org/prop/direct/{field_name}"

    elif field_name[0] == 'Q':
        wikidata_url = f"http://www.wikidata.org/entity/{field_name}"
        
    else:
        return 0
    
    try:
        response = requests.get(wikidata_url)

        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')

            aliases_section = soup.find('span', {'id': 'In more languages'})
            print(aliases_section)
            labels = []
            also_known_as = []

            if aliases_section:
                rows = aliases_section.find_next('table').find_all('tr')

                for row in rows:
                    columns = row.find_all('td')
                    if len(columns) >= 2:
                        lang = columns[0].get_text().strip()
                        alias = columns[1].get_text().strip()
                        
                        if lang == "label":
                            labels.append(alias)
                        elif lang == "Also known as":
                            also_known_as.append(alias)

            return labels, also_known_as

    except Exception as e:
        print(f"An error occurred: {str(e)}")

    return None, None

In [29]:
field_alias_dict = {
    "P1476": "title",
    "P86": "composer",
    "P136": "genre",
    "P826": "tonality",
    "P2701": "file format",
    "P577": "publication date",
    "P2699": "URL",
    "P50": "author",
    "P175": "performer",
    "P1545": "series ordinal",
    "Q4484726": "final",
    "P2561": "name",
    "P135": "movement",
    "P1299": "depicted by",
    "Q731978": "mode",
}

In [33]:
def update_mapping_with_aliases(existing_mapping, field_alias_dict):
    def add_aliases_to_properties(properties, alias_dict, parent_path=""):
        updated_properties = properties.copy()
        for field, field_info in properties.items():
            
            field_path = parent_path + field

            if "properties" in field_info:
                    
                    nested_properties = field_info["properties"]
                    updated_nested = add_aliases_to_properties(
                        nested_properties, alias_dict, field_path + "."
                    )
                    field_info["properties"] = updated_nested

                  
            if field in alias_dict:
                # print('now in',field,'add alias to',field_path)
                alias_name = alias_dict[field]
                field_fields = updated_properties[field]['properties']
                field_fields[alias_name] = {
                    "type": "alias",
                    "path": field_path
                }


        return updated_properties
    
    if "properties" not in existing_mapping.get("mappings", {}):
        raise ValueError("The 'properties' key is missing in the existing mapping.")

    properties = existing_mapping["mappings"]["properties"]
    updated_properties = add_aliases_to_properties(properties, field_alias_dict)

    output_mapping = {}
    out_temp = {}
    out_temp["properties"] = updated_properties
    output_mapping["mappings"] = out_temp
    return output_mapping



In [36]:
new_mapping = update_mapping_with_aliases(example_mappings,field_alias_dict)

In [47]:
example_mapping = {
  "mappings": {
    "properties": {
      "@id": {
        "type": "text"
      },
      "@type": {
        "type": "text"
      },
      "P135": {
        "properties": {
          "@id": {
            "type": "text"
          },
          "P2561": {
            "properties": {
              "@value": {
                "type": "text"
              }
            }
          },
          # "name": {
          #   "type": "alias",
          #   "path": "P135.P2561"
          # }
        }
      },
       "movement": {
        "type": "alias",
        "path": "P135"
      },}}}

In [51]:
example_mapping={
  "mappings": {
    "doc": {
      "properties": {
        "firstname": {
          "type": "text"
        },
        "fn": {
          "type": "alias",
          "path": "firstname" 
        }
      }
    }
  }
}

In [52]:
es.indices.create(index="trytry", mappings=example_mapping['mappings'])

BadRequestError: BadRequestError(400, 'mapper_parsing_exception', 'Root mapping definition has unsupported parameters:  [doc : {properties={firstname={type=text}, fn={path=firstname, type=alias}}}]')

In [198]:
simssa_expand[23]

{'@id': 'https://db.simssa.ca/musicalworks/24',
 '@type': ['http://www.wikidata.org/entity/Q2188189'],
 'http://www.wikidata.org/prop/direct/P86': [{'@id': 'http://www.wikidata.org/entity/Q7442579',
   'http://www.wikidata.org/entity/P2561': [{'@value': 'Sebastiano Festa'}]}],
 'https://schema.org/Dataset': [{'@id': 'https://db.simssa.ca/'}],
 'https://db.simssa.ca/files/': [{'@id': 'https://db.simssa.ca/files/93',
   '@type': ['https://db.simssa.ca/files/'],
   'http://www.wikidata.org/prop/direct/P2701': [{'@id': 'http://www.wikidata.org/entity/Q2115',
     'http://www.wikidata.org/entity/P2561': [{'@value': 'xml'}]}]},
  {'@id': 'https://db.simssa.ca/files/94',
   '@type': ['https://db.simssa.ca/files/'],
   'http://www.wikidata.org/prop/direct/P2701': [{'@id': 'http://www.wikidata.org/entity/Q10610388',
     'http://www.wikidata.org/entity/P2561': [{'@value': 'midi'}]}]},
  {'@id': 'https://db.simssa.ca/files/95',
   '@type': ['https://db.simssa.ca/files/'],
   'http://www.wikidata

In [17]:
if es.indices.exists(index="example"):
    es.indices.delete(index="example")

In [16]:
es.indices.delete(index="try")

ObjectApiResponse({'acknowledged': True})