In [5]:
## Stopwords - "the", "a", "is", "in", "and", "to", etc.
## remove stopwords for unnecessary clutter

## Quote by Georges St-Pierre

paragraph = '''You don’t get better on the days when you feel like going. 
You get better on the days when you don’t want to go, but you go anyway. If
you can overcome the negative energy coming from your tired body or unmotivated
mind, you will grow and become better. It won’t be the best workout you have, 
you won’t accomplish as much as what you usually do when you actually feel good, 
but that doesn’t matter. Growth is a long term game, and the crappy days are
more important.'''

In [1]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/edwardkim/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [4]:
from nltk.stem import PorterStemmer

In [3]:
from nltk.corpus import stopwords

In [8]:
stopwords.words('english')

['i',
 'me',
 'my',
 'myself',
 'we',
 'our',
 'ours',
 'ourselves',
 'you',
 "you're",
 "you've",
 "you'll",
 "you'd",
 'your',
 'yours',
 'yourself',
 'yourselves',
 'he',
 'him',
 'his',
 'himself',
 'she',
 "she's",
 'her',
 'hers',
 'herself',
 'it',
 "it's",
 'its',
 'itself',
 'they',
 'them',
 'their',
 'theirs',
 'themselves',
 'what',
 'which',
 'who',
 'whom',
 'this',
 'that',
 "that'll",
 'these',
 'those',
 'am',
 'is',
 'are',
 'was',
 'were',
 'be',
 'been',
 'being',
 'have',
 'has',
 'had',
 'having',
 'do',
 'does',
 'did',
 'doing',
 'a',
 'an',
 'the',
 'and',
 'but',
 'if',
 'or',
 'because',
 'as',
 'until',
 'while',
 'of',
 'at',
 'by',
 'for',
 'with',
 'about',
 'against',
 'between',
 'into',
 'through',
 'during',
 'before',
 'after',
 'above',
 'below',
 'to',
 'from',
 'up',
 'down',
 'in',
 'out',
 'on',
 'off',
 'over',
 'under',
 'again',
 'further',
 'then',
 'once',
 'here',
 'there',
 'when',
 'where',
 'why',
 'how',
 'all',
 'any',
 'both',
 'each

In [9]:
stemmer = PorterStemmer()

In [24]:
sentences = nltk.sent_tokenize(paragraph)

list

In [49]:
sentences

['’ get good day feel like go .',
 'get good day ’ want go , go anyway .',
 'overcom negat energi come tire bodi unmotiv mind , grow becom good .',
 '’ best workout , ’ accomplish much usual actual feel good , ’ matter .',
 'growth long term game , crappi day import .']

In [26]:
## Filter stopwords, then apply stemming

for i in range(len(sentences)):
    words = nltk.word_tokenize(sentences[i])
    words = [stemmer.stem(word) for word in words if word not in set(stopwords.words('english'))]
    sentences[i] = ' '.join(words) # Convert all the list of words back into sentences

['You', 'don', '’', 't', 'get', 'better', 'on', 'the', 'days', 'when', 'you', 'feel', 'like', 'going', '.']
['You', 'get', 'better', 'on', 'the', 'days', 'when', 'you', 'don', '’', 't', 'want', 'to', 'go', ',', 'but', 'you', 'go', 'anyway', '.']
['If', 'you', 'can', 'overcome', 'the', 'negative', 'energy', 'coming', 'from', 'your', 'tired', 'body', 'or', 'unmotivated', 'mind', ',', 'you', 'will', 'grow', 'and', 'become', 'better', '.']
['It', 'won', '’', 't', 'be', 'the', 'best', 'workout', 'you', 'have', ',', 'you', 'won', '’', 't', 'accomplish', 'as', 'much', 'as', 'what', 'you', 'usually', 'do', 'when', 'you', 'actually', 'feel', 'good', ',', 'but', 'that', 'doesn', '’', 't', 'matter', '.']
['Growth', 'is', 'a', 'long', 'term', 'game', ',', 'and', 'the', 'crappy', 'days', 'are', 'more', 'important', '.']


In [23]:
sentences

['you ’ get better day feel like go .',
 'you get better day ’ want go , go anyway .',
 'if overcom neg energi come tire bodi unmotiv mind , grow becom better .',
 'it ’ best workout , ’ accomplish much usual actual feel good , ’ matter .',
 'growth long term game , crappi day import .']

In [27]:
from nltk.stem import SnowballStemmer

snowball_stemmer = SnowballStemmer('english')

for i in range(len(sentences)):
    words = nltk.word_tokenize(sentences[i])
    words = [snowball_stemmer.stem(word) for word in words if word not in set(stopwords.words('english'))]
    sentences[i] = ' '.join(words) # Convert all the list of words back into sentences

In [28]:
sentences

['you ’ get better day feel like go .',
 'you get better day ’ want go , go anyway .',
 'if overcom negat energi come tire bodi unmotiv mind , grow becom better .',
 'it ’ best workout , ’ accomplish much usual actual feel good , ’ matter .',
 'growth long term game , crappi day import .']

In [50]:
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()

for i in range(len(sentences)):
    words = nltk.word_tokenize(sentences[i])
    words = [lemmatizer.lemmatize(word.lower(), pos='v') for word in words if word not in set(stopwords.words('english'))]
    sentences[i] = ' '.join(words) # Convert all the list of words back into sentences

In [51]:
sentences

['’ get good day feel like go .',
 'get good day ’ want go , go anyway .',
 'overcom negat energi come tire bodi unmotiv mind , grow becom good .',
 '’ best workout , ’ accomplish much usual actual feel good , ’ matter .',
 'growth long term game , crappi day import .']