In [1]:
import nltk
from nltk.tokenize import WhitespaceTokenizer, WordPunctTokenizer, TreebankWordTokenizer, TweetTokenizer, MWETokenizer
from nltk.stem import PorterStemmer, SnowballStemmer, WordNetLemmatizer

nltk.download('punkt')
nltk.download('wordnet')

text = "I'm loving the new AI-based chatbot, it's super cool and helpful!"

# Tokenization
print("Whitespace:", WhitespaceTokenizer().tokenize(text))
print("Punctuation:", WordPunctTokenizer().tokenize(text))
print("Treebank:", TreebankWordTokenizer().tokenize(text))
print("Tweet:", TweetTokenizer().tokenize(text))
print("MWE:", MWETokenizer([("AI-based", "chatbot")]).tokenize(text.split()))

# Stemming
porter = PorterStemmer()
snowball = SnowballStemmer("english")
tokens = TreebankWordTokenizer().tokenize(text)
print("Porter Stemmer:", [porter.stem(w) for w in tokens])
print("Snowball Stemmer:", [snowball.stem(w) for w in tokens])

# Lemmatization
lemmatizer = WordNetLemmatizer()
print("Lemmatized:", [lemmatizer.lemmatize(w) for w in tokens])

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Prasanna\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Prasanna\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Whitespace: ["I'm", 'loving', 'the', 'new', 'AI-based', 'chatbot,', "it's", 'super', 'cool', 'and', 'helpful!']
Punctuation: ['I', "'", 'm', 'loving', 'the', 'new', 'AI', '-', 'based', 'chatbot', ',', 'it', "'", 's', 'super', 'cool', 'and', 'helpful', '!']
Treebank: ['I', "'m", 'loving', 'the', 'new', 'AI-based', 'chatbot', ',', 'it', "'s", 'super', 'cool', 'and', 'helpful', '!']
Tweet: ["I'm", 'loving', 'the', 'new', 'AI-based', 'chatbot', ',', "it's", 'super', 'cool', 'and', 'helpful', '!']
MWE: ["I'm", 'loving', 'the', 'new', 'AI-based', 'chatbot,', "it's", 'super', 'cool', 'and', 'helpful!']
Porter Stemmer: ['i', "'m", 'love', 'the', 'new', 'ai-bas', 'chatbot', ',', 'it', "'s", 'super', 'cool', 'and', 'help', '!']
Snowball Stemmer: ['i', "'m", 'love', 'the', 'new', 'ai-bas', 'chatbot', ',', 'it', "'s", 'super', 'cool', 'and', 'help', '!']
Lemmatized: ['I', "'m", 'loving', 'the', 'new', 'AI-based', 'chatbot', ',', 'it', "'s", 'super', 'cool', 'and', 'helpful', '!']
