<a href="https://colab.research.google.com/github/YUVARAJC14/DSA0328-Natural-language-Processing/blob/main/Q10-Brill-Tagging.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
class TransformationBasedTagger:
    """
    A transformation-based tagger that applies rules to improve initial tagging.
    Similar to Brill tagging in NLP, this approach starts with a simple baseline
    tagger and applies transformation rules to improve accuracy.
    """

    def __init__(self):
        """Initialize the tagger with empty rules and default tags."""
        self.rules = []
        self.default_tag = 'NN'  # Default tag (noun)
        self.word_to_tag_map = {}  # For initial tagging

    def add_rule(self, rule_function, rule_description):
        """
        Add a transformation rule to the tagger.

        Args:
            rule_function: A function that takes (words, current_tags, position)
                          and returns a new tag if the rule applies, or None.
            rule_description: A description of the rule for debugging.
        """
        self.rules.append({
            'function': rule_function,
            'description': rule_description
        })

    def set_word_to_tag_map(self, word_to_tag_map):
        """
        Set a mapping from words to their most likely tags.

        Args:
            word_to_tag_map: A dictionary mapping words to their default tags.
        """
        self.word_to_tag_map = word_to_tag_map

    def set_default_tag(self, tag):
        """
        Set the default tag for words not in the word_to_tag_map.

        Args:
            tag: The default tag to use.
        """
        self.default_tag = tag

    def initial_tagging(self, words):
        """
        Perform initial tagging of words based on the word_to_tag_map.

        Args:
            words: A list of words to tag.

        Returns:
            A list of initial tags.
        """
        return [self.word_to_tag_map.get(word.lower(), self.default_tag) for word in words]

    def apply_rules(self, words, tags):
        """
        Apply transformation rules to improve the initial tagging.

        Args:
            words: A list of words.
            tags: A list of initial tags.

        Returns:
            A list of improved tags after applying transformation rules.
        """
        # Make a copy of tags to modify
        new_tags = tags.copy()

        # Track which rules were applied where
        applied_rules = [[] for _ in range(len(words))]

        # Apply rules until no more changes
        changed = True
        while changed:
            changed = False
            for position in range(len(words)):
                for rule in self.rules:
                    # Check if rule applies at this position
                    new_tag = rule['function'](words, new_tags, position)
                    if new_tag and new_tag != new_tags[position]:
                        # Rule applies, update tag
                        new_tags[position] = new_tag
                        applied_rules[position].append(rule['description'])
                        changed = True
                        # Only apply one rule per position per iteration
                        break

        return new_tags, applied_rules

    def tag(self, sentence):
        """
        Tag a sentence using initial tagging followed by transformation rules.

        Args:
            sentence: A string containing a sentence to tag.

        Returns:
            A list of (word, tag) tuples.
        """
        # Split sentence into words
        words = sentence.split()

        # Apply initial tagging
        initial_tags = self.initial_tagging(words)

        # Apply transformation rules
        final_tags, applied_rules = self.apply_rules(words, initial_tags)

        # Return word-tag pairs
        return list(zip(words, final_tags)), applied_rules


# Example rules for the tagger
def rule_prev_det_to_noun(words, tags, position):
    """Change tag to noun if previous word is a determiner."""
    if position > 0 and tags[position-1] == 'DT':
        return 'NN'
    return None

def rule_next_verb_to_noun(words, tags, position):
    """Change tag to noun if next word is a verb."""
    if position < len(words) - 1 and tags[position+1] in ['VB', 'VBZ', 'VBD', 'VBG']:
        return 'NN'
    return None

def rule_capitalized_to_proper_noun(words, tags, position):
    """Change tag to proper noun if word is capitalized (not sentence initial)."""
    if position > 0 and words[position][0].isupper():
        return 'NNP'
    return None

def rule_follows_to_preposition(words, tags, position):
    """Tag 'to' as preposition if followed by determiner or noun."""
    if (words[position].lower() == 'to' and
        position < len(words) - 1 and
        tags[position+1] in ['DT', 'NN', 'NNP', 'PRP']):
        return 'IN'  # Preposition
    return None

def rule_follows_to_infinitive(words, tags, position):
    """Tag 'to' as infinitive marker if followed by verb."""
    if (words[position].lower() == 'to' and
        position < len(words) - 1 and
        tags[position+1] in ['VB']):
        return 'TO'  # Infinitive marker
    return None


# Example usage
def main():
    # Create tagger
    tagger = TransformationBasedTagger()

    # Set up a simple lexicon for initial tagging
    word_to_tag_map = {
        'the': 'DT',
        'a': 'DT',
        'an': 'DT',
        'this': 'DT',
        'that': 'DT',
        'these': 'DT',
        'those': 'DT',
        'my': 'PRP$',
        'your': 'PRP$',
        'his': 'PRP$',
        'her': 'PRP$',
        'their': 'PRP$',
        'is': 'VBZ',
        'are': 'VBP',
        'was': 'VBD',
        'were': 'VBD',
        'eat': 'VB',
        'eats': 'VBZ',
        'eating': 'VBG',
        'ate': 'VBD',
        'walk': 'VB',
        'walks': 'VBZ',
        'walking': 'VBG',
        'walked': 'VBD',
        'to': 'TO',
        'in': 'IN',
        'on': 'IN',
        'at': 'IN',
        'by': 'IN',
        'with': 'IN',
        'and': 'CC',
        'or': 'CC',
        'but': 'CC',
        'i': 'PRP',
        'you': 'PRP',
        'he': 'PRP',
        'she': 'PRP',
        'it': 'PRP',
        'we': 'PRP',
        'they': 'PRP',
        'quickly': 'RB',
        'happily': 'RB',
        'very': 'RB',
        'really': 'RB'
    }

    tagger.set_word_to_tag_map(word_to_tag_map)
    tagger.set_default_tag('NN')  # Default to noun

    # Add transformation rules
    tagger.add_rule(rule_prev_det_to_noun, "Previous word is determiner -> noun")
    tagger.add_rule(rule_next_verb_to_noun, "Next word is verb -> noun")
    tagger.add_rule(rule_capitalized_to_proper_noun, "Capitalized word -> proper noun")
    tagger.add_rule(rule_follows_to_preposition, "'to' followed by noun/determiner -> preposition")
    tagger.add_rule(rule_follows_to_infinitive, "'to' followed by verb -> infinitive marker")

    # Test sentences
    test_sentences = [
        "The dog eats quickly",
        "I walk to the store with my friend",
        "John is eating an apple",
        "She went to buy a book",
        "The cat and the dog are in the garden"
    ]

    # Process each test sentence
    for sentence in test_sentences:
        print(f"\nSentence: '{sentence}'")

        # Get initial tags (before rule application)
        words = sentence.split()
        initial_tags = tagger.initial_tagging(words)
        print("\nInitial tagging:")
        for word, tag in zip(words, initial_tags):
            print(f"{word} -> {tag}")

        # Tag with transformation rules
        tagged_words, applied_rules = tagger.tag(sentence)

        # Print final tagging and rules applied
        print("\nFinal tagging after transformation rules:")
        for i, (word, tag) in enumerate(tagged_words):
            initial = initial_tags[i]
            if initial != tag:
                print(f"{word} -> {tag} (changed from {initial})")
                print(f"  Rules applied: {', '.join(applied_rules[i])}")
            else:
                print(f"{word} -> {tag}")

        print("\n" + "-"*50)


if __name__ == "__main__":
    main()


Sentence: 'The dog eats quickly'

Initial tagging:
The -> DT
dog -> NN
eats -> VBZ
quickly -> RB

Final tagging after transformation rules:
The -> DT
dog -> NN
eats -> VBZ
quickly -> RB

--------------------------------------------------

Sentence: 'I walk to the store with my friend'

Initial tagging:
I -> PRP
walk -> VB
to -> TO
the -> DT
store -> NN
with -> IN
my -> PRP$
friend -> NN

Final tagging after transformation rules:
I -> NN (changed from PRP)
  Rules applied: Next word is verb -> noun
walk -> VB
to -> IN (changed from TO)
  Rules applied: 'to' followed by noun/determiner -> preposition
the -> DT
store -> NN
with -> IN
my -> PRP$
friend -> NN

--------------------------------------------------

Sentence: 'John is eating an apple'

Initial tagging:
John -> NN
is -> VBZ
eating -> VBG
an -> DT
apple -> NN

Final tagging after transformation rules:
John -> NN
is -> NN (changed from VBZ)
  Rules applied: Next word is verb -> noun
eating -> VBG
an -> DT
apple -> NN

------------