### Overview
We evaluate three methods of Twitter sentiment classification.
    1. Rule based method
    2. SVM with bag-of-words features
    3. SVM with word embeddings
    
### Dataset
Tweets are from SemEval 2016 (Cleaned by Jeremy Barnes):
https://github.com/jbarnesspain/domain_blse/tree/master/datasets/semeval_2016

### Requirement
Python2  
NumPy  
scikit-learn  
NLTK  
overrides  
tqdm  

### Compatibility
All codes are written in Python2 but should be compatible with Python3. Let me know if you meet compatibility issues.

In [1]:
from __future__ import print_function
from __future__ import division

from sys import exit
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from sklearn.metrics import recall_score, precision_score, f1_score, accuracy_score
from collections import Counter
from svms import svmclsbinary
from overrides import overrides

from helpers import word2vec

import numpy as np

In [2]:
class SentimentClassifier(object):
    """ Our base class for Twitter sentiment classifiers
    Attributes:
        pos_sents: positive sentiment tweets.
        neg_sents: negataive sentiment tweets.
    
    Classmethods:
        readlines: simply read lines in corpus.
        mynumbers: compute classification results given predictions and gold labels.
    """
    def __init__(self, pos_sents, neg_sents):
        self.pos_sents = SentimentClassifier.readlines(pos_sents)
        self.neg_sents = SentimentClassifier.readlines(neg_sents)

    @staticmethod
    def readlines(myfile):
        with open(myfile, "r") as f:
            return [line.strip() for line in f.readlines()]

    @staticmethod
    def mynumbers(pred, y):
        recall = recall_score(y, pred)
        precision = precision_score(y, pred)
        f1 = f1_score(y, pred)
        acc = accuracy_score(y, pred)
        return (recall, precision, f1, acc)

In [3]:
class ruleSentimentClassifier(SentimentClassifier):
    """ Sentiment classifier using rule and lexicon based method. 
    We use implementations from VADER.
    Attributes: vader -- vader classifier
    
    Methods:
    compute_score: given positive and negative tweets, compute their 
    sentiment scores. 
    """
    def __init__(self, pos_sents, neg_sents):
        super(ruleSentimentClassifier, self).__init__(pos_sents, neg_sents)
        self.vader = SentimentIntensityAnalyzer()
        print ("Finish reading sentences and intialize VADER ...")
        self.compute_score()

    def compute_score(self):
        """
        Compund score >= 0.05: positive
        Compund score <= -0.05: negative
        Interpretations of the score: 
        https://stackoverflow.com/questions/40325980/how-is-the-vader-compound-polarity-score-calculated-in-python-nltk
        """
        retriver = lambda x: self.vader.polarity_scores(x)["compound"]
        _pos_preds = map(lambda x: int(x>=0.05), map(retriver, self.pos_sents))
        _neg_preds = map(lambda x: int(x<=-0.05), map(retriver, self.neg_sents))
        y = [1] * len(_pos_preds) + [0] * len(_neg_preds)
        result = ruleSentimentClassifier.mynumbers(_pos_preds+_neg_preds, y)
        print ("-" * 10 + " Summary (rule) " + "-" * 10)
        print ("Classification results:")
        print ("F1: {:.2f}, Recall: {:.2f}, Precision: {:.2f}, Accuracy: {:.2f}".format(
        result[2], result[0], result[1], result[-1]))
        print ("Data distribution: {}".format(dict(Counter(y))))
        print ("-" * 36)

In [4]:
class bowSentimentClassifier(SentimentClassifier):
    """ Sentiment classification using SVM. Each word is represented by a 
    one-hot vector with length |V| where V is vocabulary of TRAINING tweets.
    To represent a tweet, all word vectors are summed up.

    Methods:
        run:
            train the classifier and test performance.
        _compute_feature: 
            preprocess tweets -- downcasing and remove low frequent words.
            compute vector representation of all tweets by calling _feature_lookup.
        _feature_lookup:
            given a tweeet, compute its vectorial representation.    
    """
    def __init__(self, pos_sents, neg_sents, *args):
        super(bowSentimentClassifier, self).__init__(pos_sents, neg_sents)
        self._compute_feature()

    def run(self, test_pos_sents, test_neg_sents):
        """ Run an experiment. The binary SVM is imported from svms. Note that
        to save time we set cross-validation to False. Setting docv=True to run
        a 5-fold cross validation on the training dataset, though time consuming.
        """
        test_pos_X = map(self._feature_lookup,
                         bowSentimentClassifier.readlines(test_pos_sents))
        test_neg_X = map(self._feature_lookup,
                         bowSentimentClassifier.readlines(test_neg_sents))
        testy = [1] * len(test_pos_X) + [-1] * len(test_neg_X)
        self.svm = svmclsbinary(name="BOW",
                                X=self.pos_X+self.neg_X,
                                y=self.y,
                                testX=test_pos_X+test_neg_X,
                                testy=testy,
                                docv=False)

    def _compute_feature(self):
        corpus = ""
        for s in self.pos_sents+self.neg_sents: corpus += s.lower()
        vocab = [k for k, v in dict(Counter(corpus.split(" "))).iteritems() if v > 5]
        self.vocab = {k: idx for idx, k in enumerate(vocab)}
        self.vocab_size = len(self.vocab)
        print ("Finish computing vocab, start computing features ...")
        self.pos_X = map(self._feature_lookup, self.pos_sents)
        self.neg_X = map(self._feature_lookup, self.neg_sents)
        self.y = [1] * len(self.pos_X) + [-1] * len(self.neg_X)
        print ("Finish computing features ... Ready to train SVM ...")

    def _feature_lookup(self, sentence):
        X = []
        words = sentence.split(" ")
        for w in words:
            if w not in self.vocab: continue
            vec = [0.] * self.vocab_size
            vec[self.vocab[w]] = 1. # get the one-hot vector for this word
            X.append(vec)
        if len(X) > 1:
            X = np.sum(X, axis=0)
            assert len(X) == self.vocab_size
            return X
        elif len(X) == 1:
            assert len(X[0]) == self.vocab_size
            return X[0]
        elif len(X) == 0:
            return [0.] * self.vocab_size

In [5]:
class vecSentimentClassifier(bowSentimentClassifier):
    """ Sentiment classification using SVM. Each word its size N word embedding, computed 
    from fastText with large tweets corpora. Note that normally N << |V|.

    Methods:
        _feature_lookup:
            Now we just need to load embeddings from the word2vec function.
            Word embeddings are summed to preduce tweets representation.
    """
    def __init__(self, pos_sents, neg_sents, emb_path):
        self.word2vec = word2vec(emb_path)
        super(vecSentimentClassifier, self).__init__(pos_sents, neg_sents)

    @overrides
    def _compute_feature(self):
        assert self.word2vec is not None
        self.pos_X = map(self._feature_lookup, self.pos_sents)
        self.neg_X = map(self._feature_lookup, self.neg_sents)
        self.y = [1] * len(self.pos_X) + [-1] * len(self.neg_X)
        print ("Finish computing features ... Ready to train SVM ...")

    @overrides
    def _feature_lookup(self, sentence):
        X = []
        words = sentence.split(" ")
        for w in words:
            if w not in self.word2vec: continue
            X.append(self.word2vec[w])
        if len(X) > 1:
            return np.sum(X, axis=0)
        elif len(X) == 1:
            return X[0]
        elif len(X) == 0:
            return [0.] * 200

We then start to run our experiments ...

In [6]:
if __name__ == "__main__":
    """First specify our data path"""
    train_pos_sents = "./dataset/train_pos.txt"
    train_neg_sents = "./dataset/train_neg.txt"
    test_pos_sents = "./dataset/test_pos.txt"
    test_neg_sents = "./dataset/test_neg.txt"
    emb_path = "./embeddings/myemb.vec"

### First -- rule based methods

In [7]:
    cls = ruleSentimentClassifier(test_pos_sents, test_neg_sents)

Finish reading sentences and intialize VADER ...
---------- Summary (rule) ----------
Classification results:
F1: 0.71, Recall: 0.69, Precision: 0.72, Accuracy: 0.60
Data distribution: {0: 2386, 1: 5619}
------------------------------------


### Second -- ML based method with bag-of-words features

In [8]:
    cls = bowSentimentClassifier(train_pos_sents, train_neg_sents)

Finish computing vocab, start computing features ...
Finish computing features ... Ready to train SVM ...


Training finished ... test the trained classifier ...

In [10]:
    cls.run(test_pos_sents, test_neg_sents)

Finish training this svm for BOW...
Word::BOW f1: 0.827435064935, recall: 0.907100907635, precision: 0.760632741382, acc: 0.734415990006. TestSize::8005, TestDist::Counter({1: 5619, -1: 2386})


### Third -- ML based method with word embedding features

In [12]:
    cls = vecSentimentClassifier(train_pos_sents, train_neg_sents, emb_path)

100%|██████████| 113281/113281 [00:03<00:00, 28737.06it/s]


Finish computing features ... Ready to train SVM ...


In [13]:
    cls.run(test_pos_sents, test_neg_sents)

Finish training this svm for BOW...
Word::BOW f1: 0.860953000488, recall: 0.942160526784, precision: 0.792633627789, acc: 0.786383510306. TestSize::8005, TestDist::Counter({1: 5619, -1: 2386})
