In [19]:
import os
import nltk

if not os.path.isdir("../../data/nltk/"):
	# check whether nltk data are already downloaded
    nltk.download("averaged_perceptron_tagger", download_dir="../../data/nltk/")  # textblob
    nltk.download("subjectivity", download_dir="../../data/nltk/")                # nltk.subjectivity
# to load from file
nltk.data.path.append("../../data/nltk/")

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     ../../data/nltk/...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package subjectivity to ../../data/nltk/...
[nltk_data]   Unzipping corpora/subjectivity.zip.


## TextBlob

In [27]:
from textblob import TextBlob

text = '''The titular threat of The Blob has always struck me as the ultimate movie
monster: an insatiably hungry, amoeba-like mass able to penetrate
virtually any safeguard, capable of--as a doomed doctor chillingly
describes it--"assimilating flesh on contact.
Snide comparisons to gelatin be damned, it's a concept with the most
devastating of potential consequences, not unlike the grey goo scenario
proposed by technological theorists fearful of
artificial intelligence run rampant.'''

# ortography changes resulting pos_tags 
text2 = "the movie begins in the past where a boy named sam attempts to save celebi from a hunter."

blob = TextBlob(text2)

# to get pos_tags
blob.tags
# to get noun phrases
#blob.noun_phrases 

[('the', 'DT'),
 ('movie', 'NN'),
 ('begins', 'VBZ'),
 ('in', 'IN'),
 ('the', 'DT'),
 ('past', 'NN'),
 ('where', 'WRB'),
 ('a', 'DT'),
 ('boy', 'NN'),
 ('named', 'VBN'),
 ('sam', 'JJ'),
 ('attempts', 'NNS'),
 ('to', 'TO'),
 ('save', 'VB'),
 ('celebi', 'NN'),
 ('from', 'IN'),
 ('a', 'DT'),
 ('hunter', 'NN')]

## NLTK

In [25]:
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import subjectivity
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *

n_instances = 100
subj_docs = [(sent, 'subj') for sent in subjectivity.sents(categories='subj')[:n_instances]]
obj_docs  = [(sent, 'obj') for sent in subjectivity.sents(categories='obj')[:n_instances]]

#subj_docs[0], obj_docs[0]
#len(subj_docs), len(obj_docs)

tags = nltk.pos_tag(obj_docs[0][0])
tags

[('the', 'DT'),
 ('movie', 'NN'),
 ('begins', 'VBZ'),
 ('in', 'IN'),
 ('the', 'DT'),
 ('past', 'NN'),
 ('where', 'WRB'),
 ('a', 'DT'),
 ('young', 'JJ'),
 ('boy', 'NN'),
 ('named', 'VBN'),
 ('sam', 'JJ'),
 ('attempts', 'NNS'),
 ('to', 'TO'),
 ('save', 'VB'),
 ('celebi', 'NN'),
 ('from', 'IN'),
 ('a', 'DT'),
 ('hunter', 'NN'),
 ('.', '.')]