In [2]:
from nltk.parse.generate import generate, demo_grammar
from nltk import CFG
grammar = CFG.fromstring(demo_grammar)
print(grammar)

Grammar with 13 productions (start state = S)
    S -> NP VP
    NP -> Det N
    PP -> P NP
    VP -> 'slept'
    VP -> 'saw' NP
    VP -> 'walked' PP
    Det -> 'the'
    Det -> 'a'
    N -> 'man'
    N -> 'park'
    N -> 'dog'
    P -> 'in'
    P -> 'with'


In [3]:
for sentence in generate(grammar, n=10):
    print(' '.join(sentence))

the man slept
the man saw the man
the man saw the park
the man saw the dog
the man saw a man
the man saw a park
the man saw a dog
the man walked in the man
the man walked in the park
the man walked in the dog


In [4]:
for sentence in generate(grammar, depth=4):
     print(' '.join(sentence))

the man slept
the park slept
the dog slept
a man slept
a park slept
a dog slept


In [5]:
len(list(generate(grammar, depth=3)))

0

In [6]:
len(list(generate(grammar, depth=4)))


6

In [7]:
len(list(generate(grammar, depth=5)))

42

In [8]:
len(list(generate(grammar, depth=6)))

114

In [9]:
len(list(generate(grammar)))

114

In [14]:
groucho_grammar = nltk.CFG.fromstring("""
 S -> NP VP
 PP -> P NP
 NP -> Det N | Det N PP | 'I'
 VP -> V NP | VP PP
 Det -> 'an' | 'my'
 N -> 'elephant' | 'pajamas'
 V -> 'shot'
 P -> 'in'
 """)

In [15]:
import nltk 
sent = ['I', 'shot', 'an', 'elephant', 'in', 'my', 'pajamas']
parser = nltk.ChartParser(groucho_grammar)
for tree in parser.parse(sent):
   print(tree)

(S
  (NP I)
  (VP
    (VP (V shot) (NP (Det an) (N elephant)))
    (PP (P in) (NP (Det my) (N pajamas)))))
(S
  (NP I)
  (VP
    (V shot)
    (NP (Det an) (N elephant) (PP (P in) (NP (Det my) (N pajamas))))))


In [18]:
grammar1 = nltk.CFG.fromstring("""
  S -> NP VP
  VP -> V NP | V NP PP
  PP -> P NP
  V -> "saw" | "ate" | "walked"
  NP -> "John" | "Mary" | "Bob" | Det N | Det N PP
  Det -> "a" | "an" | "the" | "my"
  N -> "man" | "dog" | "cat" | "telescope" | "park"
  P -> "in" | "on" | "by" | "with"
  """)

In [19]:
rd_parser = nltk.RecursiveDescentParser(grammar1)
sent = 'Mary saw a dog'.split()
for tree in rd_parser.parse(sent):
 print(tree)



(S (NP Mary) (VP (V saw) (NP (Det a) (N dog))))


In [22]:
sr_parser = nltk.ShiftReduceParser(grammar1)
sent = 'Mary saw a dog'.split()
for tree in sr_parser.parse(sent):
  print(tree)

(S (NP Mary) (VP (V saw) (NP (Det a) (N dog))))


In [23]:
text = ['I', 'shot', 'an', 'elephant', 'in', 'my', 'pajamas']
groucho_grammar.productions(rhs=text[1])


[V -> 'shot']

In [20]:
grammar2 = nltk.CFG.fromstring("""
  S  -> NP VP
  NP -> Det Nom | PropN
  Nom -> Adj Nom | N
  VP -> V Adj | V NP | V S | V NP PP
  PP -> P NP
  PropN -> 'Buster' | 'Chatterer' | 'Joe'
  Det -> 'the' | 'a'
  N -> 'bear' | 'squirrel' | 'tree' | 'fish' | 'log'
  Adj  -> 'angry' | 'frightened' |  'little' | 'tall'
  V ->  'chased'  | 'saw' | 'said' | 'thought' | 'was' | 'put'
  P -> 'on'
  """)

In [25]:
groucho_dep_grammar = nltk.DependencyGrammar.fromstring("""
'shot' -> 'I' | 'elephant' | 'in'
'elephant' -> 'an' | 'in'
 'in' -> 'pajamas'
 'pajamas' -> 'my'
 """)
print(groucho_dep_grammar)

Dependency grammar with 7 productions
  'shot' -> 'I'
  'shot' -> 'elephant'
  'shot' -> 'in'
  'elephant' -> 'an'
  'elephant' -> 'in'
  'in' -> 'pajamas'
  'pajamas' -> 'my'


In [26]:
pdp = nltk.ProjectiveDependencyParser(groucho_dep_grammar)
sent = 'I shot an elephant in my pajamas'.split()
trees = pdp.parse(sent)
for tree in trees:
 print(tree)

(shot I (elephant an (in (pajamas my))))
(shot I (elephant an) (in (pajamas my)))


In [33]:
from nltk.stem import SnowballStemmer
print(" ".join(SnowballStemmer.languages))

arabic danish dutch english finnish french german hungarian italian norwegian porter portuguese romanian russian spanish swedish


In [28]:
#snowball
import nltk
sno = nltk.stem.SnowballStemmer('english')
sno.stem('grows')
'grow'


'grow'

In [29]:
sno.stem('leaves')


'leav'

In [30]:

sno.stem('fairly')


'fair'

In [32]:
import snowballstemmer
stemmer = snowballstemmer.stemmer('english');
print(stemmer.stemWords("We are the world".split()));


['We', 'are', 'the', 'world']


In [39]:
stemmer = SnowballStemmer("english")
print(stemmer.stem("running"))


run


In [40]:
stemmer2 = SnowballStemmer("english", ignore_stopwords=True)
print(stemmer.stem("having"))

have


In [41]:
print(stemmer2.stem("having"))


having


In [42]:
print(SnowballStemmer("english").stem("generously"))

generous


In [43]:
print(SnowballStemmer("porter").stem("generously"))

gener


In [44]:
from nltk.stem import SnowballStemmer
 
st = SnowballStemmer("english")
text = ['Where did he learn to dance like that?',
        'His eyes were dancing with humor.',
        'She shook her head and danced away',
        'Alex was an excellent dancer.']
 
output = []
for sentence in text:
    output.append(" ".join([st.stem(i) for i in sentence.split()]))
 
for item in output:
    print(item)
 
print("-" * 50)
print(st.stem('jumping'), st.stem('jumps'), st.stem('jumped'))

where did he learn to danc like that?
his eye were danc with humor.
she shook her head and danc away
alex was an excel dancer.
--------------------------------------------------
jump jump jump


In [48]:

import nltk 
print('NLTK Version: %s' % (nltk.__version__))

snowball_stemmer = nltk.stem.SnowballStemmer('english')


article = 'In linguistic morphology and information retrieval, stemming is the process of \
reducing inflected (or sometimes derived) words to their word stem, base or root \
form—generally a written word form. The stem need not be identical to the morphological \
root of the word; it is usually sufficient that related words map to the same stem, even \
if this stem is not in itself a valid root.'



NLTK Version: 3.4.5


In [49]:
tokens = nltk.word_tokenize(article)
print('Original Article: %s' % (article))
print()

for token in tokens:
    stemmed_token = snowball_stemmer.stem(token)
    
    if token != stemmed_token:
        print('Original : %s, New: %s' % (token, stemmed_token))


Original Article: In linguistic morphology and information retrieval, stemming is the process of reducing inflected (or sometimes derived) words to their word stem, base or root form—generally a written word form. The stem need not be identical to the morphological root of the word; it is usually sufficient that related words map to the same stem, even if this stem is not in itself a valid root.

Original : In, New: in
Original : linguistic, New: linguist
Original : morphology, New: morpholog
Original : information, New: inform
Original : retrieval, New: retriev
Original : stemming, New: stem
Original : reducing, New: reduc
Original : inflected, New: inflect
Original : sometimes, New: sometim
Original : derived, New: deriv
Original : words, New: word
Original : form—generally, New: form—gener
Original : The, New: the
Original : identical, New: ident
Original : morphological, New: morpholog
Original : usually, New: usual
Original : sufficient, New: suffici
Original : related, New: relat

In [50]:
from nltk.stem.snowball import SnowballStemmer

stemmer = SnowballStemmer(language='english')

tokens = ['compute', 'computer', 'computed', 'computing']

for token in tokens:
    print(token + ' --> ' + stemmer.stem(token))

compute --> comput
computer --> comput
computed --> comput
computing --> comput


In [3]:
from nltk.stem.snowball import HindiStemmer

stemmer = HindiStemmer()

stemmer.stem('यह वाक्य हिन्दी में है।')


ImportError: cannot import name 'HindiStemmer' from 'nltk.stem.snowball' (C:\Users\pkdut\Anaconda3\lib\site-packages\nltk\stem\snowball.py)

In [None]:
*