## Semantic analysis to extract facts from test article(s)

### Using spaCy

In [1]:


import spacy
nlp = spacy.load("en_core_web_md")


ModuleNotFoundError: No module named 'spacy'

In [3]:
text = "The Eiffel Tower is located in Paris. It was constructed in 1889."
doc = nlp(text)
doc


The Eiffel Tower is located in Paris. It was constructed in 1889.

In [4]:
# extract named entities
for ent in doc.ents:
    print(ent.text, '\t| ', ent.label_, '\t| ', spacy.explain(ent.label_))


The Eiffel Tower 	|  FAC 	|  Buildings, airports, highways, bridges, etc.
Paris 	|  GPE 	|  Countries, cities, states
1889 	|  DATE 	|  Absolute or relative dates or periods


In [5]:
# parse dependencies
for token in doc:
    print(token.text)
    if token.dep_ in ['nsubj', 'attr', 'acomp'] and token.head.pos_ in ['VERB', 'AUX']:
        print(token.text, token.head.text, [child for child in token.children])


The
Eiffel
Tower
is
located
in
Paris
.
It
was
constructed
in
1889
.


In [6]:
from spacy.matcher import Matcher

# Load spaCy and create a Matcher instance
nlp = spacy.load('en_core_web_md')
matcher = Matcher(nlp.vocab)

# Define a comprehensive list of patterns
patterns = [
    # Simple Subject-Verb-Object (SVO)
    [{'DEP': 'nsubj'}, {'POS': 'VERB'}, {'DEP': 'dobj'}],
    # Subject-Verb-Adjective (SVA)
    [{'DEP': 'nsubj'}, {'POS': 'VERB'}, {'DEP': 'acomp'}],
    # Subject-Verb-Prepositional Phrase (SVPP)
    [{'DEP': 'nsubj'}, {'POS': 'VERB'}, {'POS': 'ADP', 'OP': '?'}, {'DEP': 'pobj'}],
    # Passive Voice (Agent-Verb-Subject)
    [{'DEP': 'agent'}, {'POS': 'VERB'}, {'DEP': 'nsubjpass'}],
    # Adjectives or attributes linked to subjects
    [{'DEP': 'nsubj'}, {'POS': 'VERB'}, {'DEP': 'attr'}],
    # Handling conjunctions in factual statements
    [{'DEP': 'nsubj'}, {'POS': 'VERB'}, {'POS': 'CCONJ', 'OP': '?'}, {'DEP': 'conj'}],
    # Extended phrases involving adverbs or auxiliary verbs
    [{'DEP': 'nsubj'}, {'POS': 'AUX', 'OP': '?'}, {'POS': 'ADV', 'OP': '?'}, {'POS': 'VERB'}, {'DEP': 'dobj'}]
]

# Add patterns to the matcher
for i, pattern in enumerate(patterns):
    matcher.add(f"FACT_ASSERTION_{i}", [pattern])




In [7]:
texts = [
    "The Eiffel Tower was constructed by Gustave Eiffel.",
    "Quantum mechanics is a branch of physics.",
    "Albert Einstein developed the theory of relativity in the early 20th century.",
    "The heart pumps blood through the body.",
    "Paris, the capital of France, is known for its culture.",
    "Coffee is grown extensively in Brazil."
]

for text in texts:
    doc = nlp(text)
    matches = matcher(doc)
    for match_id, start, end in matches:
        span = doc[start:end]  # The matched span
        print(f"Factual assertion found: {span.text}")


Factual assertion found: heart pumps blood
Factual assertion found: heart pumps blood


In [8]:
from spacy import displacy

text = "Quantum mechanics is a branch of physics."
doc = nlp(text)
displacy.serve(doc, style='dep',auto_select_port=True)  # This will open a web server to visually inspect the sentence





Using the 'dep' visualizer
Serving on http://0.0.0.0:5001 ...

Shutting down server on port 5001.
