In [1]:
import json
from collections import Counter

from SPARQLWrapper import SPARQLWrapper, JSON
import time
import re

from tqdm import tqdm

import torch
from transformers import AutoTokenizer, AutoModel
import numpy as np
from langdetect import detect

# import faiss
from tenacity import retry, wait_random_exponential, before_sleep_log

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ONTOLOGY_MAPPINGS_DIR = "../utils/ontology_mappings/"

## Collecting relation names and constraints

### Collecting labels and data types of relations

In [3]:
PROP_2_LABEL = {}
PROP_2_DATA_TYPE = {}

sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

# SPARQL query for properties with data types: Item, Quantity, Point in time
query = """
SELECT ?property ?propertyLabel ?typeLabel WHERE {
  ?property a wikibase:Property .
  ?property wikibase:propertyType ?type .
  
  VALUES ?type { wikibase:WikibaseItem wikibase:Quantity wikibase:Time }
  
  BIND(
    IF(?type = wikibase:WikibaseItem, "Item",
      IF(?type = wikibase:Quantity, "Quantity",
        IF(?type = wikibase:Time, "Point in time", "Unknown")
      )
    ) AS ?typeLabel
  )
  
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
"""

sparql.setQuery(query)
sparql.setReturnFormat(JSON)

try:
    results = sparql.query().convert()

    for result in results["results"]["bindings"]:
        prop = result["property"]["value"].split("/")[-1]
        label = result.get("propertyLabel", {}).get("value", "No label")
        data_type = result.get("typeLabel", {}).get("value", "Unknown")

        PROP_2_LABEL[prop] = label
        PROP_2_DATA_TYPE[prop] = data_type        

except Exception as e:
    print(f"Error executing SPARQL query: {e}")

In [4]:
len(PROP_2_LABEL), len(PROP_2_DATA_TYPE)

(2445, 2445)

In [5]:
set(PROP_2_DATA_TYPE.values())

{'Item', 'Point in time', 'Quantity'}

In [6]:
with open(ONTOLOGY_MAPPINGS_DIR+"prop2data_type.json", 'w') as f:
    json.dump(PROP_2_DATA_TYPE, f)

In [7]:
with open(ONTOLOGY_MAPPINGS_DIR+"prop2label.json", 'w') as f:
    json.dump(PROP_2_LABEL, f)

### Collecting relation aliases

In [8]:
len(set(PROP_2_LABEL.keys())), len(set(PROP_2_LABEL.values()))

(2445, 2445)

In [9]:
@retry(wait=wait_random_exponential(multiplier=1, max=60))
def get_property_aliases(property_id):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
    
    query = f"""
    SELECT ?alias WHERE {{
      wd:{property_id} skos:altLabel ?alias .
      FILTER (lang(?alias) = "en")
    }}
    """
    
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    
    aliases = [result["alias"]["value"] for result in results["results"]["bindings"]]
    return aliases

PROP2ALIASES = {}

for property_id in tqdm(PROP_2_LABEL.keys()):
    PROP2ALIASES[property_id] = get_property_aliases(property_id)

100%|██████████| 2445/2445 [12:34<00:00,  3.24it/s]


In [10]:
alias_set = set()
alias_list = []
for prop, aliases in PROP2ALIASES.items():
    alias_set.update(aliases)
    alias_list.extend(aliases)
print(len(alias_set), len(alias_list))

8287 8876


In [11]:
for prop in PROP_2_LABEL:
    print(PROP_2_LABEL[prop], PROP2ALIASES[prop])

Human Development Index ['HDI']
population ['inhabitants', 'human population']
maximum capacity ['crew', 'capacity', 'seats', 'complement', 'number of seats', 'seating capacity']
atomic number ['Z', 'atom number', 'number of protons', 'proton number']
Elo rating []
Mohs' hardness ['hardness', 'hardness of mineral', 'mineral hardness', 'Mohs hardness']
redshift ['Z', 'z']
total produced ['circulation', 'number built', 'number made', 'number produced', 'production quantity', 'qty built', 'qty made', 'qty produced', 'quantity issued', 'total built', 'total made']
gross tonnage ['GRT', 'GT', 'ship gross tonnage', 'tonnage gross']
orbital eccentricity ['eccentricity']
g-factor []
number of speakers, writers, or signers ['signers of a language', 'speakers of language', 'writers of a language']
number of masts []
number of cylinders []
floors above ground ['stories', 'floor count', 'levels above ground', 'number of floors', 'number of stories', 'storeys']
flattening ['ellipticity', 'oblatenes

In [12]:
with open(ONTOLOGY_MAPPINGS_DIR+"prop2label.json", 'w') as f:
    json.dump(PROP_2_LABEL, f)

with open(ONTOLOGY_MAPPINGS_DIR+"prop2aliases.json", 'w') as f:
    json.dump(PROP2ALIASES, f)

In [13]:
with open(ONTOLOGY_MAPPINGS_DIR+"prop2label.json", 'r') as f:
    PROP_2_LABEL = json.load(f)

with open(ONTOLOGY_MAPPINGS_DIR+"prop2aliases.json", 'r') as f:
    PROP2ALIASES = json.load(f)

### Collecting subject and value constraints of relations

In [14]:
from SPARQLWrapper import SPARQLWrapper, JSON

@retry(wait=wait_random_exponential(multiplier=1, max=60))
def get_constraints(property_id):
    """Retrieve value-type and subject-type constraints for a specified Wikidata property."""
    
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
    
    query = f"""
    SELECT ?constraintType ?entity ?entityLabel WHERE {{
      VALUES ?property {{ wd:{property_id} }}  

      ?property p:P2302 ?statement.  # Property constraints
      ?statement ps:P2302 ?constraintEntity.  # Constraint type

      VALUES ?constraintEntity {{ wd:Q21510865 wd:Q21503250 }}  # Value-type & Subject-type constraints

      ?statement pq:P2308 ?entity.  # The constrained entity type (allowed type)

      BIND(
        IF(?constraintEntity = wd:Q21510865, "Value-type constraint", "Subject type constraint")
        AS ?constraintType
      )
    }}
    """
    # SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en". }}
    
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()

    constraints = {"Value-type constraint": [], "Subject type constraint": []}
    for result in results["results"]["bindings"]:
        constraints[result["constraintType"]["value"]].append(result["entity"]["value"].split("/")[-1])

    return constraints

# Example usage:
property_id = "P40"  # Replace with any Wikidata property ID
constraints = get_constraints(property_id)

print(constraints)

{'Value-type constraint': ['Q5', 'Q729', 'Q4886', 'Q95074', 'Q178885', 'Q207174', 'Q795052', 'Q2135501', 'Q4271324', 'Q13002315', 'Q16979650', 'Q21070568', 'Q21070598', 'Q21191150', 'Q24334299', 'Q64520857', 'Q75855169', 'Q115537581'], 'Subject type constraint': ['Q5', 'Q729', 'Q4886', 'Q95074', 'Q178885', 'Q207174', 'Q215627', 'Q219160', 'Q795052', 'Q2135501', 'Q3046146', 'Q4271324', 'Q13002315', 'Q16979650', 'Q21070568', 'Q21070598', 'Q24334299', 'Q75855169', 'Q115537581']}


In [15]:
constraint_dict = {}

for prop in tqdm(PROP_2_LABEL.keys()):
    constraint_dict[prop] = get_constraints(prop)
    time.sleep(0.1)
len(constraint_dict)

100%|██████████| 2445/2445 [21:27<00:00,  1.90it/s]  


2445

In [16]:
with open(ONTOLOGY_MAPPINGS_DIR+"prop2data_type.json", 'r') as f:
    PROP_2_DATA_TYPE = json.load(f)

In [17]:
PROP_2_DATA_TYPE

{'P1081': 'Quantity',
 'P1082': 'Quantity',
 'P1083': 'Quantity',
 'P1086': 'Quantity',
 'P1087': 'Quantity',
 'P1088': 'Quantity',
 'P1090': 'Quantity',
 'P1092': 'Quantity',
 'P1093': 'Quantity',
 'P1096': 'Quantity',
 'P1097': 'Quantity',
 'P1098': 'Quantity',
 'P1099': 'Quantity',
 'P1100': 'Quantity',
 'P1101': 'Quantity',
 'P1102': 'Quantity',
 'P1103': 'Quantity',
 'P1104': 'Quantity',
 'P1106': 'Quantity',
 'P1107': 'Quantity',
 'P1108': 'Quantity',
 'P1109': 'Quantity',
 'P1110': 'Quantity',
 'P1111': 'Quantity',
 'P1113': 'Quantity',
 'P1114': 'Quantity',
 'P1117': 'Quantity',
 'P1120': 'Quantity',
 'P1121': 'Quantity',
 'P1122': 'Quantity',
 'P1123': 'Quantity',
 'P1125': 'Quantity',
 'P1126': 'Quantity',
 'P1127': 'Quantity',
 'P1128': 'Quantity',
 'P1129': 'Quantity',
 'P1132': 'Quantity',
 'P1139': 'Quantity',
 'P1141': 'Quantity',
 'P1148': 'Quantity',
 'P1164': 'Quantity',
 'P1174': 'Quantity',
 'P1181': 'Quantity',
 'P1193': 'Quantity',
 'P1198': 'Quantity',
 'P1215': 

In [18]:
constraint_dict['P2294']

{'Value-type constraint': [], 'Subject type constraint': ['Q56061']}

In [19]:
wo_constraint = []
for prop in constraint_dict:
    if len(constraint_dict[prop]["Value-type constraint"]) == 0 and len(constraint_dict[prop]["Subject type constraint"]) == 0:
            wo_constraint.append(prop)
len(wo_constraint)

584

In [20]:
quantity_props = []
time_props = []
other_props = []
for prop in wo_constraint:
    if PROP_2_DATA_TYPE[prop] == "Quantity": 
        quantity_props.append(prop)
    elif PROP_2_DATA_TYPE[prop] == "Point in time": 
        time_props.append(prop)
    else:
        other_props.append(prop)
        
        # print(PROP_2_LABEL[prop])
len(time_props), len(quantity_props), len(other_props)

(28, 298, 258)

In [21]:
sum((28, 295, 258))

581

In [22]:
for prop in constraint_dict:
    if PROP_2_DATA_TYPE[prop] == "Point in time":
        constraint_dict[prop]["Value-type constraint"].append('Q186408')

    elif PROP_2_DATA_TYPE[prop] == 'Quantity':
        constraint_dict[prop]["Value-type constraint"].append('Q309314')

In [23]:
wo_constraint = []
for prop in constraint_dict:
    if len(constraint_dict[prop]["Value-type constraint"]) == 0 and len(constraint_dict[prop]["Subject type constraint"]) == 0:
            wo_constraint.append(prop)
len(wo_constraint)

258

In [24]:
wo_constraint = []
for prop in constraint_dict:
    if len(constraint_dict[prop]["Value-type constraint"]) == 0:
            constraint_dict[prop]["Value-type constraint"] = ['ANY']
    if len(constraint_dict[prop]["Subject type constraint"]) == 0:
        constraint_dict[prop]["Subject type constraint"] = ['ANY']
len(wo_constraint)

0

In [25]:
wo_constraint = []
for prop in constraint_dict:
    if len(constraint_dict[prop]["Value-type constraint"]) == 0 and len(constraint_dict[prop]["Subject type constraint"]) == 0:
            wo_constraint.append(prop)
len(wo_constraint)

0

In [26]:
with open(ONTOLOGY_MAPPINGS_DIR+'prop2constraints.json', 'w') as f:
    json.dump(constraint_dict, f)

In [27]:
with open(ONTOLOGY_MAPPINGS_DIR+'prop2constraints.json', 'r') as f:
    constraint_dict = json.load(f)

In [28]:
with open(ONTOLOGY_MAPPINGS_DIR+"prop2data_type.json", 'r') as f:
    PROP_2_DATA_TYPE = json.load(f)

## Colecting entities' metadata

In [29]:
entities = set()
for prop, constraint in constraint_dict.items():

    for const_type in constraint:
        for entity in constraint[const_type]:
            entities.add(entity)
entities = list(entities)

In [30]:
len(entities)

3576

### Collecting entities' hierarchy of superclasses

In [31]:
@retry(wait=wait_random_exponential(multiplier=1, max=60))
def get_subclass_hierarchy(entity_id):
      sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

      # SPARQL query to get all subclasses (direct and indirect) of the given entity
      query = f"""
      SELECT DISTINCT ?subclass ?subclassLabel WHERE {{
          {{
              wd:{entity_id} wdt:P31/wdt:P279* ?subclass.
          }}
            UNION
          {{
              wd:{entity_id} wdt:P279* ?subclass.
          }}
      }}
      """
    # SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}

      sparql.setQuery(query)
      sparql.setReturnFormat(JSON)

      results = sparql.query().convert()

      subclass_hierarchy = []

      for result in results["results"]["bindings"]:
          subclass_id = result["subclass"]["value"].split("/")[-1]
          subclass_hierarchy.append(subclass_id)

      return subclass_hierarchy

ENTITY_2_HIERARCHY = {}
for entity_id in tqdm(entities):
    hierarchy = get_subclass_hierarchy(entity_id)
    ENTITY_2_HIERARCHY[entity_id] = hierarchy

len(ENTITY_2_HIERARCHY)

100%|██████████| 3576/3576 [30:26<00:00,  1.96it/s]  


3576

In [32]:
ents = []
for item in ENTITY_2_HIERARCHY.values():
    ents.extend(item)
len(set(ents)), len(entities)

(7312, 3576)

In [33]:
# leaving only entity types that are used in constraints
for entity in tqdm(ENTITY_2_HIERARCHY):
    filtered_super_entities = [item for item in ENTITY_2_HIERARCHY[entity] if item in entities]
    ENTITY_2_HIERARCHY[entity] = filtered_super_entities

100%|██████████| 3576/3576 [00:04<00:00, 849.51it/s]


In [34]:
ents = []
for item in ENTITY_2_HIERARCHY.values():
    ents.extend(item)
len(set(ents)), len(entities)

(3576, 3576)

In [35]:
with open(ONTOLOGY_MAPPINGS_DIR + 'entity_hierarchy.json', 'w') as f:
    json.dump(ENTITY_2_HIERARCHY, f)

### Collecting entity types' labels

In [36]:
BATCH_SIZE = 50

@retry(wait=wait_random_exponential(multiplier=1, max=60))
def fetch_labels(batch):
    entity_values = " ".join(f"wd:{entity}" for entity in batch)
    
    query = f"""
    SELECT ?entity ?entityLabel WHERE {{
      VALUES ?entity {{ {entity_values} }}
      SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en". }}
    }}
    """
    
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    
    try:
        results = sparql.query().convert()
        return {
            result["entity"]["value"].split("/")[-1]: result.get("entityLabel", {}).get("value", "No label")
            for result in results["results"]["bindings"]
        }
    except Exception as e:
        print(f"Error with batch {batch[:5]}...: {e}")
        return {}

ENTITY_2_LABEL = {}

for i in range(0, len(entities), BATCH_SIZE):
    batch = entities[i:i + BATCH_SIZE]
    print(f"Processing batch {i // BATCH_SIZE + 1}/{(len(entities) // BATCH_SIZE) + 1}")
    
    labels = fetch_labels(batch)
    ENTITY_2_LABEL.update(labels)
    
# for entity, label in all_labels.items():
#     print(f"{entity}: {label}")

Processing batch 1/72
Processing batch 2/72
Processing batch 3/72
Processing batch 4/72
Processing batch 5/72
Processing batch 6/72
Processing batch 7/72
Processing batch 8/72
Processing batch 9/72
Processing batch 10/72
Processing batch 11/72
Processing batch 12/72
Processing batch 13/72
Processing batch 14/72
Processing batch 15/72
Processing batch 16/72
Processing batch 17/72
Processing batch 18/72
Processing batch 19/72
Processing batch 20/72
Processing batch 21/72
Processing batch 22/72
Processing batch 23/72
Processing batch 24/72
Processing batch 25/72
Processing batch 26/72
Processing batch 27/72
Processing batch 28/72
Processing batch 29/72
Processing batch 30/72
Processing batch 31/72
Processing batch 32/72
Processing batch 33/72
Processing batch 34/72
Processing batch 35/72
Processing batch 36/72
Processing batch 37/72
Processing batch 38/72
Processing batch 39/72
Processing batch 40/72
Processing batch 41/72
Processing batch 42/72
Processing batch 43/72
Processing batch 44/

In [37]:
len(ENTITY_2_LABEL)

3476

In [38]:
len(set(ENTITY_2_LABEL.keys())), len(set(ENTITY_2_LABEL.values()))

(3476, 3422)

In [39]:
label2entity = {}
for entity, label in ENTITY_2_LABEL.items():
    if label not in label2entity:
        label2entity[label] = []
    label2entity[label].append(entity)

for label, entities in label2entity.items():
    if len(entities) > 1:
        print(label, entities)

report ['Q10870555', 'Q10429085']
test ['Q1003030', 'Q27318']
component ['Q1310239', 'Q1117970']
order ['Q193622', 'Q567696']
state ['Q7275', 'Q3505845']
article ['Q712597', 'Q191067']
bibliography ['Q134995', 'Q1631107']
process ['Q10843872', 'Q3249551']
video recording ['Q34508', 'Q30070675']
attribute ['Q2722260', 'Q109674924']
language ['Q34770', 'Q315', 'Q4113741']
theatre company ['Q742421', 'Q11812394']
service ['Q7406919', 'Q1220872']
style ['Q1292119', 'Q5767753', 'Q2313235']
class ['Q18204', 'Q16889133']
clan ['Q989470', 'Q211503']
location ['Q109377685', 'Q115095765']
field of study ['Q1047113', 'Q2267705']
achievement ['Q2532754', 'Q2988681']
model series ['Q811701', 'Q31836768']
kinship ['Q171318', 'Q109664302']
person ['Q215627', 'Q690940']
role ['Q214339', 'Q1707847', 'Q4897819']
epithet ['Q207869', 'Q16685255']
position ['Q4164871', 'Q1781513']
territory ['Q183366', 'Q4835091']
space ['Q107', 'Q2133296']
character ['Q3241972', 'Q95074']
crossing ['Q62059481', 'Q10816681

### Collecting descriptions for entity types with duplicated labels

In [40]:
@retry(wait=wait_random_exponential(multiplier=1, max=60))
def get_entity_info(entity_id):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
    
    query = f"""
    SELECT ?entityLabel ?entityDescription WHERE {{
      wd:{entity_id} rdfs:label ?entityLabel .
      wd:{entity_id} schema:description ?entityDescription .
      FILTER (lang(?entityLabel) = "en")
      FILTER (lang(?entityDescription) = "en")
    }}
    """
    
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    if results["results"]["bindings"]:
        result = results["results"]["bindings"][0]
        return {
            "label": result["entityLabel"]["value"],
            "description": result["entityDescription"]["value"]
        }
    else:
        return None

for label, entities in label2entity.items():
    if len(entities) > 1:
        for entity_id in entities:
            info = get_entity_info(entity_id)
            ENTITY_2_LABEL[entity_id] = info['label'] + " (" + info['description'] +")"

In [41]:
len(set(ENTITY_2_LABEL.keys())), len(set(ENTITY_2_LABEL.values()))

(3476, 3476)

### Collecting entity types' aliases

In [42]:
@retry(wait=wait_random_exponential(multiplier=1, max=60))
def get_entity_aliases(entity_id):
    chinese_japanese_pattern = re.compile(r"[\u4E00-\u9FFF\u3400-\u4DBF\uF900-\uFAFF\u3040-\u309F\u30A0-\u30FF\u31F0-\u31FF\uFF00-\uFFEF]")
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
    
    query = f"""
    SELECT ?alias WHERE {{
      wd:{entity_id} skos:altLabel ?alias .
      FILTER (lang(?alias) = "en")
    }}
    """
    
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()

    aliases = []
    for result in results["results"]["bindings"]:

        alias = result["alias"]["value"]
        if not chinese_japanese_pattern.search(alias):
          aliases.append(alias)
        # except Exception as e:
        #    continue
    
    return aliases

ENTITY_2_ALIASES = {}
for entity_id in tqdm(ENTITY_2_LABEL.keys()):
    ENTITY_2_ALIASES[entity_id] = get_entity_aliases(entity_id)

100%|██████████| 3476/3476 [19:36<00:00,  2.96it/s]  


In [43]:
# [(ENTITY_2_LABEL[ent], aliases) for ent, aliases in ENTITY_2_ALIASES.items()]

In [44]:
ENTITY_2_LABEL["Q186408"], get_entity_aliases("Q186408")

('point in time',
 ['moment', 'date', 'instant', 'given moment', 'time point', 'timepoint'])

In [45]:
ENTITY_2_LABEL["Q309314"], get_entity_aliases("Q309314")

('quantity', ['number', 'amount', 'qty', 'quantulum'])

## Building inverse mapping - object/subject type constraint to relation

### Checking that relations with 'point in time' and 'quantity' data types don't have other constraints

In [47]:
# for prop, data_type in PROP_2_DATA_TYPE.items():
#     if data_type == "Quantity":    
#         val_constraints = [ENTITY_2_LABEL[ent] for ent in constraint_dict[prop]["Value-type constraint"]]
#         subj_constraints = [ENTITY_2_LABEL[ent] for ent in constraint_dict[prop]["Subject type constraint"]]            
#         # print(PROP_2_LABEL[prop], subj_constraints, val_constraints)
#         assert len(val_constraints) == 0
#         # no value constraints for data type quantity

In [49]:
# for prop, data_type in PROP_2_DATA_TYPE.items():
#     if data_type == "Point in time":    
#         val_constraints = [ENTITY_2_LABEL[ent] for ent in constraint_dict[prop]["Value-type constraint"]]
#         subj_constraints = [ENTITY_2_LABEL[ent] for ent in constraint_dict[prop]["Subject type constraint"]]            
#         # print(PROP_2_LABEL[prop], subj_constraints, val_constraints)
#         assert len(val_constraints) == 0
#         # no value constraints for data type quantity

In [50]:
constraint_dict

{'P1081': {'Value-type constraint': ['Q309314'],
  'Subject type constraint': ['Q56061', 'Q3624078']},
 'P1082': {'Value-type constraint': ['Q309314'],
  'Subject type constraint': ['Q5107',
   'Q23442',
   'Q33837',
   'Q41710',
   'Q82794',
   'Q189760',
   'Q486972',
   'Q3895768',
   'Q4835091',
   'Q15642541',
   'Q21871294',
   'Q28864185',
   'Q108788952']},
 'P1083': {'Value-type constraint': ['Q309314'],
  'Subject type constraint': ['Q42889',
   'Q132241',
   'Q811430',
   'Q811979',
   'Q1875621',
   'Q2800000',
   'Q13226383',
   'Q15720793',
   'Q17350442',
   'Q20871196',
   'Q29048322',
   'Q88985865']},
 'P1086': {'Value-type constraint': ['Q309314'],
  'Subject type constraint': ['Q11344', 'Q25276', 'Q37147', 'Q15053464']},
 'P1087': {'Value-type constraint': ['Q309314'],
  'Subject type constraint': ['Q5', 'Q40056']},
 'P1088': {'Value-type constraint': ['Q309314'],
  'Subject type constraint': ['ANY']},
 'P1090': {'Value-type constraint': ['Q309314'],
  'Subject type

In [51]:
subj2prop_constraints = {"<ANY SUBJECT>": []}
# Q309314 - quantity, Q186408 -  point in time 
obj2prop_constraint = {"<ANY OBJECT>": [], "Q309314": [], 'Q186408': []}

for prop, constraint in constraint_dict.items():

    if PROP_2_DATA_TYPE[prop] == "Point in time":
        obj2prop_constraint['Q186408'].append(prop)
    
    elif PROP_2_DATA_TYPE[prop] == 'Quantity':
        obj2prop_constraint['Q309314'].append(prop)
    
    elif constraint["Value-type constraint"] == ['ANY']:
        obj2prop_constraint["<ANY OBJECT>"].append(prop)
    
    else:
        for entity in constraint["Value-type constraint"]:
            if entity not in obj2prop_constraint:
                obj2prop_constraint[entity] = []
            obj2prop_constraint[entity].append(prop)

    
    if constraint["Subject type constraint"] ==  ['ANY']:
        subj2prop_constraints["<ANY SUBJECT>"].append(prop)

    else:
        for entity in constraint["Subject type constraint"]:
            if entity not in subj2prop_constraints:
                subj2prop_constraints[entity] = []
            subj2prop_constraints[entity].append(prop)


len(subj2prop_constraints), len(obj2prop_constraint)

(2278, 2217)

In [52]:
with open(ONTOLOGY_MAPPINGS_DIR+"subj_constraint2prop.json", 'w') as f:
    json.dump(subj2prop_constraints, f)

with open(ONTOLOGY_MAPPINGS_DIR+"obj_constraint2prop.json", 'w') as f:
    json.dump(obj2prop_constraint, f)

In [53]:
with open(ONTOLOGY_MAPPINGS_DIR+'entity_type2label.json', 'w') as f:
    json.dump(ENTITY_2_LABEL, f)

In [54]:
with open(ONTOLOGY_MAPPINGS_DIR+'prop2aliases.json', 'w') as f:
    json.dump(PROP2ALIASES, f)

In [55]:
with open(ONTOLOGY_MAPPINGS_DIR+'entity_type2aliases.json', 'w') as f:
    json.dump(ENTITY_2_ALIASES, f)

In [56]:
with open(ONTOLOGY_MAPPINGS_DIR+'entity_type2hierarchy.json', 'w') as f:
    json.dump(ENTITY_2_HIERARCHY, f)