In [1]:
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import subjectivity
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *

import itertools



In [3]:
n_instances = 4000
subj_docs = [(sent, 'subj') for sent in subjectivity.sents(categories='subj')[:n_instances]]
obj_docs = [(sent, 'obj') for sent in subjectivity.sents(categories='obj')[:n_instances]]
len(subj_docs), len(obj_docs)

(4000, 4000)

In [4]:
split = int(n_instances*0.8)
train_subj_docs = subj_docs[:split]
test_subj_docs = subj_docs[split:n_instances]
train_obj_docs = obj_docs[:split]
test_obj_docs = obj_docs[split:n_instances]
training_docs = train_subj_docs+train_obj_docs
testing_docs = test_subj_docs+test_obj_docs
sentim_analyzer = SentimentAnalyzer()
all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs])

In [5]:
unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4)
len(unigram_feats)

3430

In [6]:
sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)
training_set = sentim_analyzer.apply_features(training_docs)
test_set = sentim_analyzer.apply_features(testing_docs)

In [7]:
trainer = NaiveBayesClassifier.train
classifier = sentim_analyzer.train(trainer, training_set)

for key,value in sorted(sentim_analyzer.evaluate(test_set).items()):
    print('{0}: {1}'.format(key, value))

Training classifier
Evaluating NaiveBayesClassifier results...
Accuracy: 0.91125
F-measure [obj]: 0.9097839898348158
F-measure [subj]: 0.9126691266912671
Precision [obj]: 0.9250645994832042
Precision [subj]: 0.8983050847457628
Recall [obj]: 0.895
Recall [subj]: 0.9275


In [8]:
doc = "Energy and Resources Minister Simon Bridges says an agreement signed between the Energy Efficiency and Conservation Authority (EECA) and one of New Zealand’s largest trucking companies is a significant milestone for the Heavy Vehicle Fuel Efficiency Programme. ECA has been working with industry groups since 2012 to increase awareness of the potential for fuel efficiency and I am delighted the HW Richardson Group is now on board, with 800 trucks in 30 fleets nationwide consuming 19 million litres of fuel per year. I announced an extension to the Heavy Vehicle Fuel Efficiency Programme earlier this year as one of four carbon saving energy efficiency projects, to improve business productivity, save money and reduce carbon emissions. Once fully implemented, the programme is expected to result in New Zealand’s heavy vehicle fleets saving a total of 17 million litres of diesel per year, and reduce carbon emissions by approximately 45,900 tonnes a year, which is equivalent to the carbon emissions released by 16,000 cars. EECA’s Heavy Vehicle Fuel Efficiency Programme guides fleets through a review of their business which includes: fuel data management, driver training practices, vehicle maintenance and selection and a number of other areas, as well as helping to implement initiatives and monitor resulting changes to ensure the savings are sustained over the long term.  With good information, businesses can make smart choices which will more than pay for themselves through direct savings.  There are also a number of other benefits like emission reductions, and increased productivity and competitiveness, says Mr Bridges."

In [9]:
doc

'Energy and Resources Minister Simon Bridges says an agreement signed between the Energy Efficiency and Conservation Authority (EECA) and one of New Zealand’s largest trucking companies is a significant milestone for the Heavy Vehicle Fuel Efficiency Programme. ECA has been working with industry groups since 2012 to increase awareness of the potential for fuel efficiency and I am delighted the HW Richardson Group is now on board, with 800 trucks in 30 fleets nationwide consuming 19 million litres of fuel per year. I announced an extension to the Heavy Vehicle Fuel Efficiency Programme earlier this year as one of four carbon saving energy efficiency projects, to improve business productivity, save money and reduce carbon emissions. Once fully implemented, the programme is expected to result in New Zealand’s heavy vehicle fleets saving a total of 17 million litres of diesel per year, and reduce carbon emissions by approximately 45,900 tonnes a year, which is equivalent to the carbon emis

In [10]:
sentences = nltk.sent_tokenize(doc)
words = [nltk.word_tokenize(sent) for sent in sentences]
words = list(itertools.chain(*words))

In [11]:
sentim_analyzer.classify(words)

'obj'

In [12]:
test_sentences = ['Happiness, love, peace.', 
                  'This is bad, horrible, not nice', 
                  'These are the facts. The beehive is 100m high',
                  doc,
                 'This is delecious']

for sent in test_sentences:
    sentences = nltk.sent_tokenize(sent)
    words = [nltk.word_tokenize(sent) for sent in sentences]
    words = list(itertools.chain(*words))
    print(sentim_analyzer.classify(words))

obj
subj
subj
obj
subj


In [15]:
economic = "Economics is concerned with the optimal distribution of scarce resources within society. For example, economics is concerned with how individual decisions like how firms produce goods and which goods people buy. An important element in economics is concerned with the extent to which governments can intervene in the economy to improve economic welfare. Economics is also concerned with wider issues such as economic growth and unemployment – issues that affect the whole of society."
economics = [(w, 'economics') for s in nltk.sent_tokenize(economic) for w in nltk.word_tokenize(s)]

climate = "Remote sensing and digital imagery provide us with a vital global perspective on our changing Earth. Comparing measurements of sea level, land ice, Arctic sea ice, and carbon dioxide over the past decades suggest that the Earth’s climate is warming - a phenomenon that is attested to by an increase in the mean annual surface temperature of the Earth’s surface. Examine evidence of climate change from different parts of the Earth’s system and consider what it means to live on a planet with a dynamically changing climate."
climates = [(w, 'climate') for s in nltk.sent_tokenize(climate) for w in nltk.word_tokenize(s)]

unigram_feats = sentim_analyzer.unigram_word_feats(economic+climate, min_freq=4)

train = sentim_analyzer.apply_features(climates[:-20]+economics[:-20])
test = sentim_analyzer.apply_features(climates[-20:]+economics[-20:])

trainer = NaiveBayesClassifier.train
classifier = sentim_analyzer.train(trainer, train)

for key,value in sorted(sentim_analyzer.evaluate(test).items()):
    print('{0}: {1}'.format(key, value))

Training classifier
Evaluating NaiveBayesClassifier results...
Accuracy: 0.55
F-measure [climate]: 0.64
F-measure [economics]: 0.4
Precision [climate]: 0.5333333333333333
Precision [economics]: 0.6
Recall [climate]: 0.8
Recall [economics]: 0.3


In [16]:
for sent in test_sentences:
    sentences = nltk.sent_tokenize(sent)
    words = [nltk.word_tokenize(sent) for sent in sentences]
    words = list(itertools.chain(*words))
    print(sentim_analyzer.classify(words))

climate
climate
economics
economics
climate
