## Stemming:

In [1]:
import nltk

In [2]:
from nltk import PorterStemmer, LancasterStemmer, SnowballStemmer

In [34]:
def words_stemmer(words, type="PorterStemmer", lang="english", encoding="utf8"):
    stemmers = ["PorterStemmer", "LancasterStemmer", "SnowballStemmer"]
    if type is False or type not in stemmers:
        return words
    else:
        stem_words = []
        if type == "PorterStemmer":
            stemmer = PorterStemmer()
            for w in words:
                stem_words.append(stemmer.stem(w).encode(encoding))
        if type == "LancasterStemmer":
            stemmer = LancasterStemmer()
            for w in words:
                stem_words.append(stemmer.stem(w).encode(encoding))
        if type == "SnowballStemmer":
            stemmer = SnowballStemmer(lang)
            for w in words:
                stem_words.append(stemmer.stem(w).encode(encoding))
    return b" ".join(stem_words)        

In [35]:
words = "caring cares carefully cared"

In [37]:
print("Original:", words)
print("Porter: ", words_stemmer(nltk.word_tokenize(words), "PorterStemmer"))
print("Lancaster: ", words_stemmer(nltk.word_tokenize(words), "LancasterStemmer"))
print("Snowball: ", words_stemmer(nltk.word_tokenize(words), "SnowballStemmer"))

Original: caring cares carefully cared
Porter:  b'care care care care'
Lancaster:  b'car car car car'
Snowball:  b'care care care care'


## Lemmatization:

In [38]:
from nltk.stem import WordNetLemmatizer

In [39]:
wlem = WordNetLemmatizer()

In [44]:
#Function to apply lemmatization to list of words
def words_lemmatizer(text, encoding="utf8"):
    words = nltk.word_tokenize(text)
    lemma_words = []
    wl = WordNetLemmatizer()
    for w in words:
        pos = find_pos(w)
        lemma_words.append(wl.lemmatize(w, pos).encode(encoding))
    return b" ".join(lemma_words)    

In [45]:
#n    NOUN 
#v    VERB 
#a    ADJECTIVE 
#s    ADJECTIVE SATELLITE 
#r    ADVERB 

In [46]:
def find_pos(word):
    #part of speech constants
    pos = nltk.pos_tag(nltk.word_tokenize(word))[0][1]
    # Adjective tags : "JJ", "JJR", "JJS"
    if pos.lower()[0] == 'j':
        return 'a'
    # Adverb tags : "RB", "RBR", "RBS"
    elif pos.lower()[0] == 'r':
        return 'r'
    # Verb tags: "VB", "VBD", "VBG", "VBN", "VBP", "VBZ"
    elif pos.lower()[0] == 'v':
        return 'v'
    # Noun tags: "NN", "NNS", "NNP", "NNPS"
    else:
        return 'n'

In [47]:
print("Lemmatized: ", words_lemmatizer(words))

Lemmatized:  b'care care carefully care'


### Getting synonyms and antonyms for a given word with wordnet

In [48]:
# Wordnet is a large lexical database for English words that are linked together
# by their semantic relationships. 
# It groups words together based on their meanings.

In [49]:
from nltk.corpus import wordnet

In [50]:
s = wordnet.synsets("suitable")
print("Definition: ", s[0].definition())
print("Example: ", s[0].examples())

Definition:  meant or adapted for an occasion or use
Example:  ['a tractor suitable (or fit) for heavy duty', 'not an appropriate (or fit) time for flippancy']


In [51]:
synonyms = []
antonyms = []
for s in wordnet.synsets("better"):
    for l in s.lemmas():
        synonyms.append(l.name())
        if l.antonyms():
            antonyms.append(l.antonyms()[0].name())

print("synonyms: \n", set(synonyms))
print("antonyms: \n", set(antonyms))

synonyms: 
 {'honest', 'dependable', 'skillful', 'skilful', 'full', 'punter', 'ameliorate', 'salutary', 'considerably', 'sound', 'respectable', 'substantially', 'serious', 'unspoiled', 'break', 'honorable', 'comfortably', 'undecomposed', 'adept', 'ripe', 'upright', 'improve', 'near', 'estimable', 'unspoilt', 'meliorate', 'well', 'expert', 'best', 'right', 'dear', 'safe', 'effective', 'bettor', 'proficient', 'secure', 'advantageously', 'in_force', 'practiced', 'intimately', 'better', 'wagerer', 'just', 'easily', 'beneficial', 'in_effect', 'amend', 'good'}
antonyms: 
 {'badly', 'worse', 'disadvantageously', 'ill', 'bad', 'evil', 'worsen'}


## References:

http://www.nltk.org/api/nltk.stem.html

http://en.wikipedia.org/wiki/Stemming

https://wordnet.princeton.edu/wordnet/man/wndb.5WN.html#sect3

http://www.nltk.org/howto/wordnet.html