In [1]:
import nltk

In [2]:
sents: list[str] = [
    "It is obvious that you have been misled.",
    "It's a shame what happened to you and your sister",
    "It might be a good idea to wear a respirator mask when you're working with fiberglass.",
    "It's likely that the enemy simply dropped back off the hilltop once they'd grabbed all the weapons they could carry.",
    "It surprised everybody that Marlene had so much energy and strength.",
]

In [3]:
for sent in sents:
    print(nltk.pos_tag(nltk.word_tokenize(sent)))

[('It', 'PRP'), ('is', 'VBZ'), ('obvious', 'JJ'), ('that', 'IN'), ('you', 'PRP'), ('have', 'VBP'), ('been', 'VBN'), ('misled', 'VBN'), ('.', '.')]
[('It', 'PRP'), ("'s", 'VBZ'), ('a', 'DT'), ('shame', 'NN'), ('what', 'WDT'), ('happened', 'VBD'), ('to', 'TO'), ('you', 'PRP'), ('and', 'CC'), ('your', 'PRP$'), ('sister', 'NN')]
[('It', 'PRP'), ('might', 'MD'), ('be', 'VB'), ('a', 'DT'), ('good', 'JJ'), ('idea', 'NN'), ('to', 'TO'), ('wear', 'VB'), ('a', 'DT'), ('respirator', 'NN'), ('mask', 'NN'), ('when', 'WRB'), ('you', 'PRP'), ("'re", 'VBP'), ('working', 'VBG'), ('with', 'IN'), ('fiberglass', 'NN'), ('.', '.')]
[('It', 'PRP'), ("'s", 'VBZ'), ('likely', 'JJ'), ('that', 'IN'), ('the', 'DT'), ('enemy', 'NN'), ('simply', 'RB'), ('dropped', 'VBD'), ('back', 'RB'), ('off', 'IN'), ('the', 'DT'), ('hilltop', 'NN'), ('once', 'IN'), ('they', 'PRP'), ("'d", 'MD'), ('grabbed', 'VB'), ('all', 'PDT'), ('the', 'DT'), ('weapons', 'NNS'), ('they', 'PRP'), ('could', 'MD'), ('carry', 'VB'), ('.', '.')]
[

In [19]:
from enum import Enum, auto
import nltk
from nltk.tokenize import word_tokenize
from nltk import pos_tag

# Make sure NLTK's resources are downloaded
nltk.download("averaged_perceptron_tagger")
nltk.download("punkt")


class State(Enum):
    INITIAL = auto()
    FOUND_IT = auto()
    FOUND_VERB = auto()
    FOUND_ADJ = auto()
    FOUND_THAT_TO = auto()


extraposition_adjective_list = ["obvious", "shame", "good", "likely", "surprised"]


def tag_extraposed_adjectives(sentence):
    state = State.INITIAL
    tokens = word_tokenize(sentence)
    tagged_tokens = pos_tag(tokens)

    for i, (word, tag) in enumerate(tagged_tokens):
        if state == State.INITIAL and word.lower() == "it":
            state = State.FOUND_IT
        elif state == State.FOUND_IT and tag.startswith("V"):
            state = State.FOUND_VERB
        elif (
            state == State.FOUND_VERB
            and tag == "JJ"
            and word.lower() in extraposition_adjective_list
        ):
            state = State.FOUND_ADJ
            tagged_tokens[i] = (word, "JJ_exp")  # Update tag to JJ_exp
            state = State.INITIAL  # Reset state for next adjective
        elif state == State.FOUND_ADJ and word.lower() in ["that", "to"]:
            state = State.FOUND_THAT_TO
        elif word in [".", "?", "!"]:
            state = State.INITIAL  # Reset state for sentences ending

    return tagged_tokens

# Apply the function and print results
for sentence in sents:
    tagged_tokens = tag_extraposed_adjectives(sentence)
    print(f"Sentence: {sentence}")
    print("Updated Tags:", tagged_tokens)
    print()

Sentence: It is obvious that you have been misled.
Updated Tags: [('It', 'PRP'), ('is', 'VBZ'), ('obvious', 'JJ_exp'), ('that', 'IN'), ('you', 'PRP'), ('have', 'VBP'), ('been', 'VBN'), ('misled', 'VBN'), ('.', '.')]

Sentence: It's a shame what happened to you and your sister
Updated Tags: [('It', 'PRP'), ("'s", 'VBZ'), ('a', 'DT'), ('shame', 'NN'), ('what', 'WDT'), ('happened', 'VBD'), ('to', 'TO'), ('you', 'PRP'), ('and', 'CC'), ('your', 'PRP$'), ('sister', 'NN')]

Sentence: It might be a good idea to wear a respirator mask when you're working with fiberglass.
Updated Tags: [('It', 'PRP'), ('might', 'MD'), ('be', 'VB'), ('a', 'DT'), ('good', 'JJ_exp'), ('idea', 'NN'), ('to', 'TO'), ('wear', 'VB'), ('a', 'DT'), ('respirator', 'NN'), ('mask', 'NN'), ('when', 'WRB'), ('you', 'PRP'), ("'re", 'VBP'), ('working', 'VBG'), ('with', 'IN'), ('fiberglass', 'NN'), ('.', '.')]

Sentence: It's likely that the enemy simply dropped back off the hilltop once they'd grabbed all the weapons they could 

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/vscode/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package punkt to /home/vscode/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
