In [2]:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [5]:
import nltk

# Define a context-free grammar
grammar = nltk.CFG.fromstring("""
    S -> NP VP
    NP -> Det N | 'I'
    VP -> V NP
    Det -> 'a' | 'an' | 'the'
    N -> 'dog' | 'cat' | 'bat'
    V -> 'chased' | 'saw'
""")

# Create a parser based on the defined CFG
parser = nltk.ChartParser(grammar)

# Define a sentence to parse
sentence = "I saw a dog"

# Tokenize the sentence
tokens = sentence.split()

# Parse the sentence and print the parse trees
for tree in parser.parse(tokens):
    print(tree)
    tree.pretty_print()
grammar?

(S (NP I) (VP (V saw) (NP (Det a) (N dog))))
         S             
  _______|___           
 |           VP        
 |    _______|___       
 |   |           NP    
 |   |        ___|___   
 NP  V      Det      N 
 |   |       |       |  
 I  saw      a      dog



[1;31mType:[0m           CFG
[1;31mString form:[0m   
Grammar with 12 productions (start state = S)
           S -> NP VP
           NP -> Det N
           NP -> 'I'
           VP <...> n'
           Det -> 'the'
           N -> 'dog'
           N -> 'cat'
           N -> 'bat'
           V -> 'chased'
           V -> 'saw'
[1;31mFile:[0m           c:\users\admin\appdata\local\programs\python\python310\lib\site-packages\nltk\grammar.py
[1;31mDocstring:[0m     
A context-free grammar.  A grammar consists of a start state and
a set of productions.  The set of terminals and nonterminals is
implicitly specified by the productions.

If you need efficient key-based access to productions, you
can use a subclass to implement it.
[1;31mInit docstring:[0m
Create a new context-free grammar, from the given start state
and set of ``Production`` instances.

:param start: The start symbol
:type start: Nonterminal
:param productions: The list of productions that defines the grammar
:type pro

In [6]:
import nltk

# Define the weighted grammar
pcfg_dict = {
    'S': [(['NP', 'VP'], 0.5), (['S', 'CC', 'S'], 0.5)],
    'NP': [(['Det', 'N'], 1.0)],
    'VP': [(['V', 'NP'], 1.0)],
    'Det': [(['the'], 0.7), (['a'], 0.3)],
    'N': [(['cat'], 0.25), (['dog'], 0.25), (['man'], 0.25), (['woman'], 0.25)],
    'V': [(['runs'], 0.4), (['walks'], 0.4), (['jumps'], 0.2)],
    'CC': [(['and'], 0.4), (['but'], 0.3), (['or'], 0.3)]
}

# Convert the PCFG dictionary into an NLTK WeightedGrammar
grammar_productions = []
for non_terminal, productions in pcfg_dict.items():
    for production in productions:
        rhs, prob = production
        grammar_productions.append(nltk.WeightedProduction(non_terminal, rhs, prob))

weighted_grammar = nltk.WeightedGrammar(nltk.Nonterminal('S'), grammar_productions)

# Define the input sentence
sentence = "the cat runs"

# Create a parser using CYK algorithm
parser = nltk.parse.WeightedChartParser(weighted_grammar)

# Parse the sentence
parses = parser.parse(sentence.split())

# Print the parse trees
for tree in parses:
    print(tree)


AttributeError: module 'nltk' has no attribute 'WeightedProduction'

In [8]:
import nltk

# Define the weighted grammar
pcfg_dict = {
    'S': [(['NP', 'VP'], 0.5), (['S', 'CC', 'S'], 0.5)],
    'NP': [(['Det', 'N'], 1.0)],
    'VP': [(['V', 'NP'], 1.0)],
    'Det': [(['the'], 0.7), (['a'], 0.3)],
    'N': [(['cat'], 0.25), (['dog'], 0.25), (['man'], 0.25), (['woman'], 0.25)],
    'V': [(['runs'], 0.4), (['walks'], 0.4), (['jumps'], 0.2)],
    'CC': [(['and'], 0.4), (['but'], 0.3), (['or'], 0.3)]
}

# Convert the PCFG dictionary into an NLTK WeightedGrammar
grammar_productions = []
for non_terminal, productions in pcfg_dict.items():
    for production in productions:
        rhs, prob = production
        grammar_productions.append(nltk.ProbabilisticProduction(nltk.Nonterminal(non_terminal), rhs, prob))

weighted_grammar = nltk.WeightedGrammar(nltk.Nonterminal('S'), grammar_productions)

# Define the input sentence
sentence = "the cat runs"

# Create a parser using CYK algorithm
parser = nltk.parse.WeightedChartParser(weighted_grammar)

# Parse the sentence
parses = parser.parse(sentence.split())

# Print the parse trees with probabilities
for tree in parses:
    print(tree)


TypeError: ProbabilisticProduction.__init__() takes 3 positional arguments but 4 were given

In [13]:
import nltk
grammar = nltk.PCFG.fromstring("""
    S -> NP VP [1.0]
    PP -> P NP [1.0]
    VP -> V NP [0.7] 
    VP -> VP PP [0.3]
    P -> 'with' [1.0]
    V -> 'saw' [1.0]
    NP -> NP PP [0.4]
    NP -> 'astronomers' [0.1]
    NP -> 'ears' [0.18]
    NP -> 'saw' [0.04]
    NP -> 'stars' [0.18]
    NP -> 'telescopes' [0.1]

    """)

# Convert the NLTK PCFG grammar to a Python dictionary
pcfg_dict = {}
for production in grammar.productions():
    lhs = str(production.lhs())
    rhs = [str(sym) for sym in production.rhs()]
    prob = production.prob()
    
    if lhs in pcfg_dict:
        pcfg_dict[lhs].append((rhs, prob))
    else:
        pcfg_dict[lhs] = [(rhs, prob)]

print(pcfg_dict)


{'S': [(['NP', 'VP'], 1.0)], 'PP': [(['P', 'NP'], 1.0)], 'VP': [(['V', 'NP'], 0.7), (['VP', 'PP'], 0.3)], 'P': [(['with'], 1.0)], 'V': [(['saw'], 1.0)], 'NP': [(['NP', 'PP'], 0.4), (['astronomers'], 0.1), (['ears'], 0.18), (['saw'], 0.04), (['stars'], 0.18), (['telescopes'], 0.1)]}
Grammar with 12 productions (start state = S)
    S -> NP VP [1.0]
    PP -> P NP [1.0]
    VP -> V NP [0.7]
    VP -> VP PP [0.3]
    P -> 'with' [1.0]
    V -> 'saw' [1.0]
    NP -> NP PP [0.4]
    NP -> 'astronomers' [0.1]
    NP -> 'ears' [0.18]
    NP -> 'saw' [0.04]
    NP -> 'stars' [0.18]
    NP -> 'telescopes' [0.1]
