# Sentiment Analysis with Senpy
J. Fernando Sánchez Rada
![Senpy](logo.png)

# Sentiment Analysis 101

# Training

In [3]:
import nltk
tweets = [
    (['love', 'this', 'car'], 'positive'),
    (['this', 'view', 'amazing'], 'positive'),
    (['feel', 'great', 'this', 'morning'], 'positive'),
    (['excited', 'about', 'the', 'concert'], 'positive'),
    (['best', 'friend'], 'positive'),
    (['not', 'like', 'this', 'car'], 'negative'),
    (['this', 'view', 'horrible'], 'negative'),
    (['feel', 'tired', 'this', 'morning'], 'negative'),
    (['not', 'looking', 'forward', 'the', 'concert'], 'negative'),
    (['enemy'], 'negative')]

def get_words_in_tweets(tweets):
    all_words = []
    for (words, sentiment) in tweets:
      all_words.extend(words)
    return all_words

def get_word_features(wordlist):
    wordlist = nltk.FreqDist(wordlist)
    word_features = wordlist.keys()
    return word_features

word_features = get_word_features(get_words_in_tweets(tweets))

def extract_features(document):
    document_words = set(document)
    features = {}
    for word in word_features:
        features['contains(%s)' % word] = (word in document_words)
    return features

training_set = nltk.classify.apply_features(extract_features, tweets)

classifier = nltk.NaiveBayesClassifier.train(training_set)

print classifier.show_most_informative_features(5)

Most Informative Features
           contains(not) = False          positi : negati =      1.6 : 1.0
         contains(tired) = False          positi : negati =      1.2 : 1.0
       contains(excited) = False          negati : positi =      1.2 : 1.0
         contains(great) = False          negati : positi =      1.2 : 1.0
       contains(looking) = False          positi : negati =      1.2 : 1.0
None


# Analysis

In [4]:
def get_sentiment_bayes(sentence):
    return classifier.classify(extract_features(sentence.split()))

In [5]:
test_tweets = [
    (['feel', 'happy', 'this', 'morning'], 'positive'),
    (['larry', 'friend'], 'positive'),
    (['not', 'like', 'that', 'man'], 'negative'),
    (['house', 'not', 'great'], 'negative'),
    (['your', 'song', 'annoying'], 'negative')]


for tweet in test_tweets:
    text = " ".join(tweet[0])
    result = get_sentiment_bayes(text)
    print("{text}\n\t{polarity} - {result}".format(text=text,
                                                   polarity=tweet[1],
                                                   result=result))

feel happy this morning
	positive - positive
larry friend
	positive - positive
not like that man
	negative - negative
house not great
	negative - negative
your song annoying
	negative - positive


# Simpler Sentiment Analysis

In [6]:
def get_sentiment_simple(text):
    if ':)' in text:
        return 'positive'
    elif ':(' in text:
        return 'negative'
    return 'neutral'

In [7]:
tweets = ['Larry is my friend :)', 'Larry is a jerk :(', 'And I don\'t care']
for tweet in tweets:
    print('{}\n\t{}'.format(tweet, get_sentiment_simple(tweet)))

Larry is my friend :)
	positive
Larry is a jerk :(
	negative
And I don't care
	neutral


# Sentiment Analysis Services

# How do we share this with the world?
* DIY approach
* **NIF**

#DIY Approach
* Pros
    * Flexibility
    * Simplicity
* Cons
    * Lack of interoperability
    * Time consuming (developing and understanding)

# NIF in a nutshell

* Format
    * RDF/JSON-LD
    * Developed for NLP
    * Simple model of *Contexts* and *Strings*
* API
    * HTTP GET
    * Parameters:
        * input(i)
        * informat (f)
        * intype (t)
        * outformat (o)
        * ...
    * Extra parameters
        * algorithm(a)
        * language (l)
        
* More info: http://persistence.uni-leipzig.org/nlp2rdf/specification/api.html

* Pros
    * Standardized format
    * Simple API
    * Interoperability
* Cons
    * Verbosity
    * Steep learning curve (RDF, semantic web)
    * Evolving (NIF 3.0 on its way)

# Something in between?
* Dead easy for developers
* Flexibility
* Common API
* Interoperability
* Multiple formats

#Enter senpy


In [8]:
from IPython.display import IFrame
IFrame('https://pypi.python.org/pypi/senpy', width=700, height=350)

# How senpy works

* Abstraction for NIF (or other formats)
* Plugins to implement different algorithms
    * Multiple algorithms, same API
    * Non-blocking (gevent)
* CLI to deploy a server

# Use cases

* Deploying your sentiment analysis as a web service
* Mocking
* Evaluation of different algorithms

# Installation

In [9]:
!pip install --upgrade senpy

Requirement already up-to-date: senpy in /home/jfernando/git/senpy
Requirement already up-to-date: Flask>=0.10.1 in /home/jfernando/.local/lib/python2.7/site-packages (from senpy)
Requirement already up-to-date: gunicorn>=19.0.0 in /home/jfernando/.local/lib/python2.7/site-packages (from senpy)
Requirement already up-to-date: requests>=2.4.1 in /home/jfernando/.local/lib/python2.7/site-packages (from senpy)
Requirement already up-to-date: GitPython>=0.3.2.RC1 in /home/jfernando/.local/lib/python2.7/site-packages (from senpy)
Requirement already up-to-date: gevent>=1.0.1 in /usr/local/lib/python2.7/dist-packages (from senpy)
Collecting PyLD>=0.6.5 (from senpy)
  Downloading PyLD-0.6.8.tar.gz (42kB)
[K    100% |████████████████████████████████| 45kB 760kB/s 
[?25hRequirement already up-to-date: Flask-Testing>=0.4.2 in /usr/local/lib/python2.7/dist-packages (from senpy)
Collecting Werkzeug>=0.7 (from Flask>=0.10.1->senpy)
  Downloading Werkzeug-0.10.4-py2.py3-none-any.whl (293kB)
[K   

In [10]:
!python -m senpy --help

usage: __main__.py [-h] [--level logging_level] [--debug] [--host HOST] [--port PORT]
                   [--plugins-folder PLUGINS_FOLDER]

Run a Senpy server

optional arguments:
  -h, --help            show this help message and exit
  --level logging_level, -l logging_level
                        Logging level
  --debug, -d           Run the application in debug mode
  --host HOST           Use 0.0.0.0 to accept requests from any host.
  --port PORT, -p PORT  Port to listen on.
  --plugins-folder PLUGINS_FOLDER, -f PLUGINS_FOLDER
                        Where to look for plugins.


In [12]:
!python -m senpy

Server running on port 127.0.0.1:5000. Ctrl+C to quit
^CKeyboardInterrupt
Bye!



In [15]:
import json
import requests
def print_response(url):
    res = requests.get(url).json()
    if "@context" in res:
        del res["@context"]
    print json.dumps(res, indent=2)

In [17]:
print_response('http://localhost:5000')

{
  "status": 404, 
  "message": "Missing or invalid parameters", 
  "errors": {
    "input": {
      "required": true, 
      "@id": "input", 
      "help": "Input text", 
      "aliases": [
        "i", 
        "input"
      ]
    }
  }, 
  "parameters": {
    "informat": "text", 
    "prefix": "", 
    "intype": "direct", 
    "outformat": "json-ld", 
    "urischeme": "RFC5147String"
  }
}


In [18]:
print_response('http://localhost:5000/plugins')

{
  "sentiment140": {
    "is_activated": true, 
    "version": "0.1", 
    "@id": "sentiment140_0.1", 
    "name": "sentiment140", 
    "extra_params": {
      "@id": "extra_params_sentiment140_0.1", 
      "language": {
        "required": false, 
        "@id": "lang_sentiment140", 
        "options": [
          "es", 
          "en", 
          "auto"
        ], 
        "aliases": [
          "language", 
          "l"
        ]
      }
    }
  }, 
  "rand": {
    "is_activated": true, 
    "version": "0.1", 
    "@id": "rand_0.1", 
    "name": "rand", 
    "extra_params": {
      "@id": "extra_params_rand_0.1", 
      "language": {
        "required": false, 
        "@id": "lang_rand", 
        "options": [
          "es", 
          "en", 
          "auto"
        ], 
        "aliases": [
          "language", 
          "l"
        ]
      }
    }
  }
}


In [19]:
print_response('http://localhost:5000/plugins/sentiment140')

{
  "is_activated": true, 
  "version": "0.1", 
  "@id": "sentiment140_0.1", 
  "name": "sentiment140", 
  "extra_params": {
    "@id": "extra_params_sentiment140_0.1", 
    "language": {
      "required": false, 
      "@id": "lang_sentiment140", 
      "options": [
        "es", 
        "en", 
        "auto"
      ], 
      "aliases": [
        "language", 
        "l"
      ]
    }
  }
}


In [20]:
print_response('http://localhost:5000/plugins/sentiment140/deactivate')

{
  "message": "Ok"
}


In [21]:
print_response('http://localhost:5000/plugins/sentiment140')

{
  "is_activated": false, 
  "version": "0.1", 
  "@id": "sentiment140_0.1", 
  "name": "sentiment140", 
  "extra_params": {
    "@id": "extra_params_sentiment140_0.1", 
    "language": {
      "required": false, 
      "@id": "lang_sentiment140", 
      "options": [
        "es", 
        "en", 
        "auto"
      ], 
      "aliases": [
        "language", 
        "l"
      ]
    }
  }
}


# Developing new plugins

# Anatomy of a plugin

In [22]:
!tree playground/base_plugins/rand -I *.pyc

playground/base_plugins/rand
├── rand.py
└── rand.senpy

0 directories, 2 files


The .senpy file contains information about the plugin, including its name (**name**) and where the code can be imported from (**module**)

In [23]:
!cat playground/base_plugins/rand/rand.senpy

{
    "name": "rand",
    "module": "rand",
    "description": "What my plugin broadly does",
    "author": "@balkian",
    "version": "0.1",
    "extra_params": {
        "language": {
            "@id": "lang_rand",
            "aliases": ["language", "l"],
            "required": false,
            "options": ["es", "en", "auto"]
        }
     },
     "requirements": {},
     "marl:maxPolarityValue": "1",
     "marl:minPolarityValue": "-1"
}


In [24]:
# %load playground/base_plugins/rand/rand.py
import json
import random

from senpy.plugins import SentimentPlugin
from senpy.models import Response, Opinion, Entry


class Sentiment140Plugin(SentimentPlugin):
    def analyse(self, **params):
        lang = params.get("language", "auto")

        p = params.get("prefix", None)
        response = Response(prefix=p)
        polarity_value = max(-1, min(1, random.gauss(0.2, 0.2)))
        polarity = "marl:Neutral"
        if polarity_value > 0:
            polarity = "marl:Positive"
        elif polarity_value < 0:
            polarity = "marl:Negative"
        entry = Entry(id="Entry0",
                      text=params["input"],
                      prefix=p)
        opinion = Opinion(id="Opinion0",
                          prefix=p,
                          hasPolarity=polarity,
                          polarityValue=polarity_value)
        opinion["prov:wasGeneratedBy"] = self.id
        entry.opinions.append(opinion)
        entry.language = lang
        response.entries.append(entry)
        return response

In [25]:
print_response('http://localhost:5000?i=let us go&algo=rand')

{
  "@id": "_:b0", 
  "analysis": [
    {
      "minPolarityValue": 0.0, 
      "version": "0.1", 
      "@id": "rand_0.1", 
      "maxPolarityValue": 1.0, 
      "name": "rand"
    }
  ], 
  "entries": [
    {
      "text": "let us go", 
      "@id": "Entry0", 
      "nif:language": "auto", 
      "opinions": [
        {
          "@id": "Opinion0", 
          "prov:wasGeneratedBy": "rand_0.1", 
          "marl:hasPolarityValue": 0.47205581920028605, 
          "marl:hasPolarity": "marl:Positive"
        }
      ]
    }
  ]
}


# A plugin for our dummy service

In [26]:
!tree playground/plugins -I *.pyc

playground/plugins
├── bayes.py
├── bayes.senpy
├── smileys.py
└── smileys.senpy

0 directories, 4 files


In [27]:
!cat playground/plugins/smileys.senpy

{
    "name": "smileys",
    "module": "smileys",
    "description": "A simple plugin that detects smileys",
    "author": "@balkian",
    "version": "0.1",
    "requirements": {}
}


In [28]:
# %load playground/plugins/smileys.py
import json
import random

from senpy.plugins import SentimentPlugin
from senpy.models import Response, Opinion, Entry


class SmileysPlugin(SentimentPlugin):

    def get_sentiment(self, text):
        if ':)' in text:
            return 'marl:Positive'
        elif ':(' in text:
            return 'marl:Negative'
        return 'marl:Neutral'

    def analyse(self, **params):
        response = Response()
        text = params['input']
        polarity = self.get_sentiment(text)
        entry = Entry(text=text)
        opinion = Opinion(hasPolarity=polarity)
        opinion["prov:wasGeneratedBy"] = self.id
        entry.opinions.append(opinion)
        response.entries.append(entry)
        return response


# More sophisticated plugins

# Loading and unloading resources
* Asynchronous load

In [29]:
def activate(self):
    pass

def deactivate(self):
    pass

# Example: The Naive Bayes service

In [30]:
!cat playground/plugins/bayes.senpy

{
    "name": "naivebayes",
    "module": "bayes",
    "description": "Using NLTK for sentiment analysis",
    "author": "@balkian",
    "version": "0.1",
    "requirements": {"nltk": "*"}
}


In [31]:
# %load playground/plugins/bayes.py
import json
import nltk

from senpy.plugins import SentimentPlugin
from senpy.models import Response, Opinion, Entry


class BayesPlugin(SentimentPlugin):

    def activate(self):
        tweets = [
            (['love', 'this', 'car'], 'marl:Positive'),
            (['this', 'view', 'amazing'], 'marl:Positive'),
            (['feel', 'great', 'this', 'morning'], 'marl:Positive'),
            (['excited', 'about', 'the', 'concert'], 'marl:Positive'),
            (['best', 'friend'], 'marl:Positive'),
            (['not', 'like', 'this', 'car'], 'marl:Negative'),
            (['this', 'view', 'horrible'], 'marl:Negative'),
            (['feel', 'tired', 'this', 'morning'], 'marl:Negative'),
            (['not', 'looking', 'forward', 'the', 'concert'], 'marl:Negative'),
            (['enemy'], 'marl:Negative')]

        def get_words_in_tweets(tweets):
            all_words = []
            for (words, sentiment) in tweets:
                all_words.extend(words)
            return all_words

        def get_word_features(wordlist):
            wordlist = nltk.FreqDist(wordlist)
            word_features = wordlist.keys()
            return word_features

        self._word_features = get_word_features(get_words_in_tweets(tweets))


        training_set = nltk.classify.apply_features(self.extract_features, tweets)

        self._classifier = nltk.NaiveBayesClassifier.train(training_set)

    def get_sentiment(self, text):
        return self._classifier.classify(self.extract_features(text.split()))

    def extract_features(self, document):
        document_words = set(document)
        features = {}
        for word in self._word_features:
            features['contains(%s)' % word] = (word in document_words)
        return features

    def analyse(self, **params):
        response = Response()
        text = params['input']
        polarity = self.get_sentiment(text)
        entry = Entry(text=text)
        opinion = Opinion(hasPolarity=polarity)
        opinion["prov:wasGeneratedBy"] = self.id
        entry.opinions.append(opinion)
        response.entries.append(entry)
        return response


In [35]:
print_response('http://localhost:5000/?i=not%20looking%20forward%20to%20the%20concert&algo=naivebayes')

{
  "@id": "_:b0", 
  "analysis": [
    {
      "minPolarityValue": 0.0, 
      "version": "0.1", 
      "@id": "naivebayes_0.1", 
      "maxPolarityValue": 1.0, 
      "name": "naivebayes"
    }
  ], 
  "entries": [
    {
      "text": "not looking forward to the concert", 
      "@id": "_:b1", 
      "opinions": [
        {
          "prov:wasGeneratedBy": "naivebayes_0.1", 
          "@id": "_:b2", 
          "marl:hasPolarity": "marl:Negative"
        }
      ]
    }
  ]
}


# Questions?

jfernando@dit.upm.es
http://github.com/gsi-upm/senpy