In [1]:
!pip install stanza

Collecting stanza
  Using cached stanza-1.8.1-py3-none-any.whl (970 kB)
Installing collected packages: stanza
Successfully installed stanza-1.8.1


In [2]:
import stanza
# stanza.download('en')       # This downloads the English models for the neural pipeline
nlp = stanza.Pipeline('en') # This sets up a default neural pipeline in English
doc = nlp("Barack Obama was born in Hawaii.  He was elected president in 2008.")
doc.sentences[0].print_dependencies()

Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.8.0.json:   0%|   …

2024-04-09 12:55:34 INFO: Downloaded file to /home/zuoyu916/stanza_resources/resources.json
2024-04-09 12:55:34 INFO: Downloading default packages for language: en (English) ...
2024-04-09 12:55:43 INFO: File exists: /home/zuoyu916/stanza_resources/en/default.zip
2024-04-09 12:55:54 INFO: Finished downloading models and saved to /home/zuoyu916/stanza_resources
2024-04-09 12:55:54 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.8.0.json:   0%|   …

2024-04-09 12:55:54 INFO: Downloaded file to /home/zuoyu916/stanza_resources/resources.json
2024-04-09 12:55:56 INFO: Loading these models for language: en (English):
| Processor    | Package                   |
--------------------------------------------
| tokenize     | combined                  |
| mwt          | combined                  |
| pos          | combined_charlm           |
| lemma        | combined_nocharlm         |
| constituency | ptb3-revised_charlm       |
| depparse     | combined_charlm           |
| sentiment    | sstplus_charlm            |
| ner          | ontonotes-ww-multi_charlm |

2024-04-09 12:55:56 INFO: Using device: cpu
2024-04-09 12:55:56 INFO: Loading: tokenize
2024-04-09 12:55:58 INFO: Loading: mwt
2024-04-09 12:55:58 INFO: Loading: pos
2024-04-09 12:55:59 INFO: Loading: lemma
2024-04-09 12:55:59 INFO: Loading: constituency
2024-04-09 12:56:00 INFO: Loading: depparse
2024-04-09 12:56:01 INFO: Loading: sentiment
2024-04-09 12:56:02 INFO: Loading: ner

('Barack', 4, 'nsubj:pass')
('Obama', 1, 'flat')
('was', 4, 'aux:pass')
('born', 0, 'root')
('in', 6, 'case')
('Hawaii', 4, 'obl')
('.', 4, 'punct')


In [3]:
import stanza
#assuming NLP pipeline for english has already been initialised

def extract_words(input_text):

    # Process the input text
    doc = nlp(input_text)

    # Extract nouns (NN, NNS, NNP, NNPS)
    ##ADJ for adjectives
    ##N for nouns
    nouns = []
    for sentence in doc.sentences:
        for word in sentence.words:
            if word.upos.startswith('N') or word.upos == "PROPN":
                print(f"{word.text}, {word.upos}, {word.deprel}")
            else:
                print(f"\t{word.text}, {word.upos}, {word.deprel}")

# Example usage
input_sentence = "The magnetic flux density is defined as the force per unit length per unit current acting on a straight current-carrying conductor placed perpendicular to a magnetic field."
nouns_list = extract_words(input_sentence)

	The, DET, det
	magnetic, ADJ, amod
flux, NOUN, compound
density, NOUN, nsubj:pass
	is, AUX, aux:pass
	defined, VERB, root
	as, ADP, case
	the, DET, det
force, NOUN, obl
	per, ADP, case
unit, NOUN, compound
length, NOUN, nmod
	per, ADP, case
unit, NOUN, compound
current, NOUN, nmod
	acting, VERB, advcl
	on, ADP, case
	a, DET, det
	straight, ADJ, amod
	current, ADJ, amod
	-, PUNCT, punct
	carrying, VERB, amod
conductor, NOUN, obl
	placed, VERB, acl
	perpendicular, ADJ, xcomp
	to, ADP, case
	a, DET, det
	magnetic, ADJ, amod
field, NOUN, obl
	., PUNCT, punct


In [4]:
def extract_subj(input_text):
    doc = nlp(input_text)
    long = []
    for sentence in doc.sentences:
        out = ""
        for word in sentence.words:
            if word.upos == "ADJ":
                out += f"{word.text} "
            elif word.upos.startswith("N"):
                out += f"{word.text} "
            if word.upos == "AUX":
                long.append(out)
                break
    if long:
        return long

In [5]:
input_sentence = """
The naturalistic fallacy: Just because this is the way things CURRENTLY ARE does not mean this is the way things OUGHT TO BE.
"""
print(extract_subj(input_sentence))

['naturalistic fallacy ']


In [30]:
import re
#Assuming stanza is imported
def extract_subj_v2(input_text):
    #Assuming stanza pipeline for english has already been initialised
    long_linetext = re.sub(r'[\n\r]+',': ',input_text)
    # print(long_linetext)
    doc = nlp(long_linetext)
    long = {'subj':''}
    for sentence in doc.sentences:
        out = ""
        for word in sentence.words:
            if word.upos == "ADJ":
                out += f"{word.text} "
            elif word.upos == "ADP":
                out += f"{word.text} "
            elif word.upos.startswith("N") or word.upos == "PROPN":
                out += f"{word.text} "
            elif word.upos == "AUX" or word.text==":":
                if out != '': 
                    long['subj'] = out
                    long[word.upos] = word.text
                break

        # if out != '': long.append({"subj":out,word.upos:word.text})
    if long:
        return long

In [31]:
input_sentence = "A dog is a mammal."
print(extract_subj_v2(input_sentence))

input_sentence = """The magnetic flux density is defined as the force per unit length per unit current acting
on a straight current-carrying conductor placed perpendicular to a magnetic field."""
# extract_words(input_sentence)
print(extract_subj_v2(input_sentence))

input_sentence = """Intuitionism: 
It is the theory that moral truths in metaethics are known directly by intuition."""
# extract_words(input_sentence)
print(extract_subj_v2(input_sentence))

input_sentence = """
The naturalistic fallacy: Just because this is the way things CURRENTLY ARE does not mean this is the way things OUGHT TO BE.
"""
print(extract_subj_v2(input_sentence))

input_sentence = """Divine Command Theory (DCT)
It holds that all moral beliefs come from God."""
# extract_words(input_sentence)
print(extract_subj_v2(input_sentence))

input_sentence = """Force is the product of mass and acceleration. Momentum is the product of force and velocity."""
# extract_words(input_sentence) ##Able to distinguish the key points from 2 dff sentences
print(extract_subj_v2(input_sentence))

input_sentence = """Momentum is the product of force and velocity. It is the integral of Force w.r.t. time."""
extract_words(input_sentence)
print(extract_subj_v2(input_sentence))

input_sentence = """There is a high number of commuters in the evening, which is the cause of the jam."""
extract_words(input_sentence) ##Code understands that 'is' is not used for deginition in this case
print(extract_subj_v2(input_sentence))

input_sentence = """The equation for Force only due to acceleration is $F=ma$"""
extract_words(input_sentence)
print(extract_subj_v2(input_sentence))

{'subj': 'dog ', 'AUX': 'is'}
{'subj': 'magnetic flux density ', 'AUX': 'is'}
{'subj': 'Intuitionism ', 'PUNCT': ':'}
{'subj': ''}
{'subj': 'Divine Command Theory DCT ', 'PUNCT': ':'}
{'subj': 'Momentum ', 'AUX': 'is'}
Momentum, NOUN, nsubj
	is, AUX, cop
	the, DET, det
product, NOUN, root
	of, ADP, case
force, NOUN, nmod
	and, CCONJ, cc
velocity, NOUN, conj
	., PUNCT, punct
	It, PRON, nsubj
	is, AUX, cop
	the, DET, det
integral, NOUN, root
	of, ADP, case
Force, NOUN, nmod
	w.r.t., SYM, case
time, NOUN, appos
	., PUNCT, punct
{'subj': 'Momentum ', 'AUX': 'is'}
	There, PRON, expl
	is, VERB, root
	a, DET, det
	high, ADJ, amod
number, NOUN, nsubj
	of, ADP, case
commuters, NOUN, nmod
	in, ADP, case
	the, DET, det
evening, NOUN, obl
	,, PUNCT, punct
	which, PRON, nsubj
	is, AUX, cop
	the, DET, det
cause, NOUN, acl:relcl
	of, ADP, case
	the, DET, det
jam, NOUN, nmod
	., PUNCT, punct
{'subj': 'high number of commuters in evening ', 'AUX': 'is'}
	The, DET, det
equation, NOUN, nsubj
	for, ADP, c

In [39]:
class flashcard():
    def __init__(self, text):
        self._qns = self.extract(text)
        self._ans = text
    def output(self):
        return f"{self._qns}\n{self._ans}"
    def extract(self,text):
        d = extract_subj_v2(text)
        try:
            aux = d["AUX"]
        except KeyError:
            aux = "is"
        subj = d["subj"]
        return f"What {aux} {subj.rstrip(' ')}?"

dog = flashcard("Momentum is the product of force and velocity. It is the integral of Force w.r.t. time.")
print(dog.output())

What is Momentum?
Momentum is the product of force and velocity. It is the integral of Force w.r.t. time.
