#  Paper Implentation

In [71]:
import spacy
from nltk.corpus import wordnet as wn
from owlready2 import *
import pandas as pd

# Define thresholds for superclass assignment (adjust as needed)
higher_threshold = 0.3  # Threshold for first-level classes
lower_threshold = 0.5  # Threshold for non-first-level classes


def tag_noun_with_superclass(ontology, noun, higher_threshold= 0.3 , lower_threshold= 0.5):
  """
  Assigns a superclass to a noun based on an OWL ontology and thresholds.

  Args:
      ontology (Ontology): The loaded OWL ontology.
      noun (str): The noun to be tagged.
      higher_threshold (float): Threshold for first-level classes.
      lower_threshold (float): Threshold for non-first-level classes.

  Returns:
      str: The superclass tag for the noun (or None if not tagged).
  """

  # Check if noun corresponds to an ontology class (consider label or IRI)
  for cls in ontology.classes():
    if noun in cls.label or noun == cls.iri:
      return str(cls.is_a[0])  # Return first superclass

  # Otherwise, no direct class match, so skip

  return None

# Load the English language model
nlp = spacy.load("en_core_web_md")

def analyze_sentence(sentence, ontology_file_path):
  """
  Analyzes a sentence and outputs ontology, NER, and dependency parsing data with
  improvements for relation/attribute identification, lexical resources, coreference
  resolution, disambiguation, and superclass tagging.

  Args:
      sentence (str): The sentence to be analyzed.
      ontology_file_path (str): The path to the OWL ontology file.

  Returns:
      dict: A dictionary containing the ontology, NER, and dependency parsing results.
  """

  # Load the OWL ontology
  onto = get_ontology(ontology_file_path).load()
  ontology = onto

  doc = nlp(sentence)

  # Prepare empty results dictionary
  results = {
      "ontology": [],
      "ner": [],
      "dependency_parsing": []
  }

  # Coreference resolution (using simple heuristic)
  coref_chains = {}
  for token in doc:
    if token.dep_ == "nsubj":  # Identify potential subjects
      entity_text = token.text
      if entity_text in coref_chains:
        coref_chains[entity_text].append(token.idx)
      else:
        coref_chains[entity_text] = [token.idx]

  # Process each token
  for token in doc:
    text = token.text
    pos = token.pos_
    dep_ = token.dep_
    head = token.head.text

    # Named Entity Recognition (NER)
    entity_text = ""
    entity_type = ""
    if token.ent_type_:
      entity_text = text
      entity_type = token.ent_type_
    results["ner"].append({"text": entity_text, "type": entity_type})

    # Ontology Table with relation and attribute logic, and superclass tagging
    if pos in ["NOUN", "PROPN"]:
      relation = ""
      attributes = []
      superclass = None  # Initialize superclass

      # Check for verb phrase as relation (consider dependency parsing)
      if dep_ == "ROOT" or dep_ == "verb":
        relation = head

      # Identify potential attributes using part-of-speech and dependency parsing
      for child in token.children:
        if child.pos_ in ["ADJ", "ADP"]:  # Adjectives or adpositional phrases
          attributes.append(child.text)

      # Coreference resolution (apply to previously identified entity)
      if token.idx in coref_chains:
        entity_text = coref_chains[entity_text][0]  # Use first mention as reference
        entity_text = doc[entity_text].text

      # Superclass tagging using the loaded ontology
      superclass = tag_noun_with_superclass(ontology, entity_text, higher_threshold, lower_threshold)

      results["ontology"].append({
          "text": entity_text,
          "type": "Entity" if entity_type else "Potential Entity",
          "relation": relation,
          "attributes": attributes,
          "superclass": superclass
      })

   # Dependency Parsing
    results["dependency_parsing"].append({
        "word": text,
        "pos": pos,
        "dependency": dep_,
        "head": head
    })

  # WordNet integration (enrich ontology with hypernyms/synonyms)
  for entity in results["ontology"]:
    if entity["text"]:
      synsets = wn.synsets(entity["text"])
      if synsets:
        entity["hypernyms"] = [synset.hypernyms() for synset in synsets][0]  # Get first hypernym
        entity["synonyms"] = [lemma.name() for synset in synsets for lemma in synset.lemmas()]

  # Disambiguation (simple example using WordNet glosses)
  for entity in results["ontology"]:
    if entity["relation"] and entity["text"]:
      synsets = wn.synsets(entity["text"])
      if synsets:
        gloss_matches = [gloss for synset in synsets for gloss in synset.definition().split() if entity["relation"] in gloss]
        if gloss_matches:
          entity["relation"] = gloss_matches[0]  # Choose first gloss match (can be improved)

  return results

# Example usage (modify the ontology file path accordingly)
ontology_file_path = "onto88.owx"
sentence = "The queen is not laying any eggs"
analysis_result = analyze_sentence(sentence, ontology_file_path)

# Print the results (further processing might be needed)
print("Ontology Table:")
print(analysis_result["ontology"])
print("\nNER Table:")
print(analysis_result["ner"])
print("\nDependency Parsing Table:")
print(analysis_result["dependency_parsing"])

Ontology Table:
[{'text': '', 'type': 'Potential Entity', 'relation': '', 'attributes': [], 'superclass': None}, {'text': '', 'type': 'Potential Entity', 'relation': '', 'attributes': [], 'superclass': None}]

NER Table:
[{'text': '', 'type': ''}, {'text': '', 'type': ''}, {'text': '', 'type': ''}, {'text': '', 'type': ''}, {'text': '', 'type': ''}, {'text': '', 'type': ''}, {'text': '', 'type': ''}]

Dependency Parsing Table:
[{'word': 'The', 'pos': 'DET', 'dependency': 'det', 'head': 'queen'}, {'word': 'queen', 'pos': 'NOUN', 'dependency': 'nsubj', 'head': 'laying'}, {'word': 'is', 'pos': 'AUX', 'dependency': 'aux', 'head': 'laying'}, {'word': 'not', 'pos': 'PART', 'dependency': 'neg', 'head': 'laying'}, {'word': 'laying', 'pos': 'VERB', 'dependency': 'ROOT', 'head': 'laying'}, {'word': 'any', 'pos': 'DET', 'dependency': 'det', 'head': 'eggs'}, {'word': 'eggs', 'pos': 'NOUN', 'dependency': 'dobj', 'head': 'laying'}]


In [73]:

# turn into dataframe
df_namess= pd.DataFrame(analysis_result["dependency_parsing"])
df_namess

Unnamed: 0,word,pos,dependency,head
0,The,DET,det,queen
1,queen,NOUN,nsubj,laying
2,is,AUX,aux,laying
3,not,PART,neg,laying
4,laying,VERB,ROOT,laying
5,any,DET,det,eggs
6,eggs,NOUN,dobj,laying


# DataFrames

In [65]:
import pandas as pd


def create_dataframes_from_analysis(analysis_result):
    """
    Generates separate DataFrames for verbs, nouns, and relations.

    Handles potential missing keys in entities and uses compatible methods
    for DataFrame creation and updates for older pandas versions.

    Args:
        analysis_result (dict): The dictionary containing the results from
            the analyze_sentence function.

    Returns:
        tuple: A tuple containing three DataFrames (verbs, nouns, relations).
    """

    df_verbs = pd.DataFrame(columns=["id", "word", "pos", "dependency", "head"])
    df_nouns = pd.DataFrame(columns=["id", "text", "type", "relation", "attributes", "superclass", "hypernyms", "synonyms"])
    df_relations = pd.DataFrame(columns=["verb_id", "noun_id"])

    verb_id = 1
    noun_id = 1

    # Process entities from the ontology results (handle missing keys)
    noun_data = []
    for entity in analysis_result["ontology"]:
        noun_data.append({
            "id": noun_id,
            "text": entity.get("text", ""),  # Handle missing 'text' with empty string
            "type": entity.get("type", "Potential Entity"),  # Handle missing 'type'
            "relation": entity.get("relation", ""),  # Handle missing 'relation' with empty string
            "attributes": entity.get("attributes", []),  # Handle missing 'attributes' with empty list
            "superclass": entity.get("superclass", ""),  # Handle missing 'superclass' with empty string
            "hypernyms": entity.get("hypernyms", []),  # Handle missing 'hypernyms' with empty list
            "synonyms": entity.get("synonyms", []),  # Handle missing 'synonyms' with empty list
        })
        noun_id += 1
    df_nouns = pd.concat([df_nouns, pd.DataFrame(noun_data)], ignore_index=True)

    # Process verbs from the dependency parsing results
    verb_data = []
    for dependency_item in analysis_result["dependency_parsing"]:
        if dependency_item["pos"] == "VERB":
            dependency_item["id"] = verb_id
            verb_data.append(dependency_item)
            verb_id += 1
    df_verbs = pd.concat([df_verbs, pd.DataFrame(verb_data)], ignore_index=True)

    # Create relation entries for verbs and nouns found in the same sentence
    for dependency_item in analysis_result["dependency_parsing"]:
        if dependency_item["pos"] == "VERB":
            sentence_nouns = df_nouns[df_nouns["text"].isin([dependency_item["word"]])]  # Pass a list to isin
            for _, noun_row in sentence_nouns.iterrows():
                df_relations = pd.concat([df_relations, pd.DataFrame({"verb_id": dependency_item["id"], "noun_id": noun_row["id"]})], ignore_index=True)

    return df_verbs, df_nouns, df_relations

df_verbs, df_nouns, df_relations= create_dataframes_from_analysis(analysis_result)

In [68]:
df_relations

Unnamed: 0,verb_id,noun_id
