# Subjectivity Detection

### Subjectivity Detection Example

In [None]:
#nltk.corpus.subjectivity is a built-in NLTK corpus that contains a dataset of subjective and objective sentences extracted from movie reviews. 
from nltk.corpus import subjectivity
# NaiveBayesClassifier is a simple probabilistic classifier from NLTK that we use to train on those labeled sentences so it learns to classify new sentences as subjective or objective.
from nltk.classify import NaiveBayesClassifier
from nltk.classify.util import accuracy
#sent_tokenize is used to split raw text (big sentences) into sentences before classifying each one.
from nltk.tokenize import sent_tokenize
import nltk
nltk.download('subjectivity')
# We create subj_docs and obj_docs as training data for the classifier.
# The classifier uses this data to learn how to tell if a sentence is an opinion or a fact.
subj_docs = [(sent, 'subj') for sent in subjectivity.sents(categories='subj')]
obj_docs = [(sent, 'obj') for sent in subjectivity.sents(categories='obj')]

# A small example subjective and objective dataset
# subj_docs = [
#     (['I', 'love', 'this', 'movie'], 'subj'),
#     (['This', 'was', 'amazing'], 'subj')
# ]
# obj_docs = [
#     (['The', 'movie', 'was', 'released', 'in', '2020'], 'obj'),
#     (['It', 'lasts', '120', 'minutes'], 'obj')
# ]
# Feature extractor example: presence of words
def extract_features(words):
#extract_features(['The', 'movie', 'was', 'Great'])
# {
#   'the': True,
#   'movie': True,
#   'was': True,
#   'great': True
# }
    return {word.lower(): True for word in words}
# Prepare training data
# - Combine all subjective and objective labeled sentences.

# - Convert each sentence into a feature dictionary using extract_features.

# - Pair features with labels.

# - Store all pairs in train for classifier training.
train = [(extract_features(doc), label) for doc, label in subj_docs + obj_docs]

# Train classifier
classifier = NaiveBayesClassifier.train(train)
# Example text
text = "The movie was fantastic but the ending was disappointing."

# Split to sentences and classify
for sentence in sent_tokenize(text):
    features = extract_features(sentence.split())
    print("after extracting features : ", features)
    print(sentence, '->', classifier.classify(features))




[nltk_data] Downloading package subjectivity to /home/mca/nltk_data...
[nltk_data]   Unzipping corpora/subjectivity.zip.


The movie was fantastic but the ending was disappointing. -> subj
