In [1]:
#7A
''' 
Aim: Define grammar using nltk. Analyse a sentence using the same

Defining grammar: creating structured
rules to analyze and process sentences based on grammatical principles

Context free grammar (CFG) is widely used in NLTK.
In CFG, the sentences are broken into hierarchical structures consisting
non-terminal and terminal symbols.
Non-terminal: It contains noun phrases, verb phrases etc
Terminals: Actual words'''

#Need to import library
import nltk
from nltk import tokenize
nltk.download('punkt_tab')

#Defining a grammar
#S: Start node then with the help of '->' we will define
#the sequence of the noun, verb, determinants
grammar1 = nltk.CFG.fromstring("""
S -> VP
VP -> VP NP
NP -> Det NP
Det -> 'that'
NP -> 'flight'
VP -> 'Book'
""")

#Input sentence
sentence = "Book that flight"

#Tokenizing the sentence
all_tokens = tokenize.word_tokenize(sentence)
print(all_tokens)

#Creating a instance of the ChartParser for parsing
parser = nltk.ChartParser(grammar1)

for tree in parser.parse(all_tokens):
    print(tree)
    tree.pretty_print()

['Book', 'that', 'flight']
(S (VP (VP Book) (NP (Det that) (NP flight))))
      S             
      |              
      VP            
  ____|____          
 |         NP       
 |     ____|____     
 VP  Det        NP  
 |    |         |    
Book that     flight



[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Artophilic\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [2]:
#7B
''' 
Aim: Accept the input string with Regular expression of FA: 101+
In Natural Language Processing (NLP) and automata theory, Finite Automata (FA) and
Regular Expressions (RegEx) are widely used to process and recognize patterns in strings.

FA:101+ : It represents a RExp that matches strings containing the pattern "101" followed by 1 or more 1'''

def FA(s):
    if len(s) < 3:
        return 'Rejected'
    if s[0] == '1':
        if s[1] == '0':
            if s[2] == '1':
                for i in range(3, len(s)):
                    if s[i] != '1':
                        return 'Rejected'
                return 'Accepted'
            return 'Rejected'
        return 'Rejeceted'
    return 'Rejected'

inputs = ['1', '10101', '101', '10111', '01010', '100', '','10111101', '1011111']
for i in inputs:
    print(FA(i))

Rejected
Rejected
Accepted
Accepted
Rejected
Rejected
Rejected
Rejected
Accepted


In [None]:
#7C
''' 
Aim: Accept the input string with Regular expression of FA: (a+b)*bba'''

def FA(s):
    size = 0
    # scan complete string and make sure that it contains only 'a' & 'b'
    for i in s:
        if i == 'a' or i == 'b': # Corrected the condition
            size += 1
        else:
            return 'Rejected'
    if size >= 3:
        if s[size - 3] == 'b':
            if s[size - 2] == 'b':
                if s[size - 1] == 'a':
                    return 'Accepted'
    return 'Rejected'
inputs = ['bba', 'ababbba', 'abba', 'abb', 'baba', 'bbb', '']
for i in inputs:
    print(FA(i))

Accepted
Accepted
Accepted
Rejected
Rejected
Rejected
Rejected


In [None]:
#7D
''' 
Aim: Implementation of Deductive Chart Parsing using context free grammar
and a given sentence.

Deductive Chart Parsing is a dynamic programming-based parsing technique used in
Natural Language Processing (NLP) to analyze sentence structures using Context-Free
Grammar (CFG).

It efficiently parses complex and ambiguous sentences by incrementally
building a parse chart and applying inference rules to recognize valid phrases.

The process begins with empty parse chart
CFG rules are applied to identify grammatical structures.
Intermediate results are stored to avoid redundant computations.
Then parsing continues by deducing new possible structures until the entire sentence is parsed.
'''

import nltk
from nltk import tokenize
# Define a modified context-free grammar
grammar1 = nltk.CFG.fromstring("""
S -> NP VP
PP -> P NP
NP -> Det N | 'I'
VP -> V NP | VP PP
Det -> 'a' | 'my'
N -> 'bird' | 'balcony'
V -> 'saw'
P -> 'in'
""")
# Input sentence
sentence = "I saw a bird in my balcony"

# Tokenize the sentence
all_tokens = tokenize.word_tokenize(sentence)

print("Tokens:", all_tokens) # Output: ['I', 'saw', 'a', 'bird', 'in', 'my', 'balcony']

# Create a chart parser with the defined grammar
parser = nltk.ChartParser(grammar1)

# Parse the tokenized sentence and print only one parse tree
for tree in parser.parse(all_tokens):
    print(tree) # Print each parse tree
    tree.pretty_print() # Display the parse tree in a pretty format
    break # Exit after printing the first (and only) parse tree


Tokens: ['I', 'saw', 'a', 'bird', 'in', 'my', 'balcony']
(S
  (NP I)
  (VP
    (VP (V saw) (NP (Det a) (N bird)))
    (PP (P in) (NP (Det my) (N balcony)))))
     S                                  
  ___|___________                        
 |               VP                     
 |        _______|________               
 |       VP               PP            
 |    ___|___          ___|___           
 |   |       NP       |       NP        
 |   |    ___|___     |    ___|_____     
 NP  V  Det      N    P  Det        N   
 |   |   |       |    |   |         |    
 I  saw  a      bird  in  my     balcony

