Update sentiment benchmark to use sentida pip package

alexandrainst · Sep 14, 2020 · fc75820 · fc75820
1 parent 6a9596f
commit fc75820
Show file tree

Hide file tree

Showing 3 changed files with 20 additions and 57 deletions.
diff --git a/docs/models/sentiment_analysis.md b/docs/models/sentiment_analysis.md
@@ -23,7 +23,7 @@ The tool scores texts with an integer where scores <0 are negative, =0 are neutr
 
 #### Sentida
 The tool Sentida  [(Lauridsen et al. 2019)](https://tidsskrift.dk/lwo/article/view/115711)
-uses a lexicon based approach to sentiment analysis. The tool scores texts with a continuous value. There exist two versions of the tool where the second version is an implementation in Python:  [Sentida](https://github.com/esbenkc/emma) and in these documentations we evaluate this second version. 
+uses a lexicon based approach to sentiment analysis. The tool scores texts with a continuous value. There exist both an R version and an implementation in Python.  In these documentations we evaluate the python version from [sentida](https://github.com/guscode/sentida). 
 
 #### :wrench:Bert Emotion
 
@@ -115,15 +115,15 @@ The tools are benchmarked on the following datasets:
 
 A conversion of the scores of the LCC and Europarl Sentiment dataset and the Afinn model is done in the following way: a score of zero to be "neutral", a positive score to be "positive" and a negative score to be "negative". 
 
-A conversion of the continuous scores of the Sentida tool into three classes is not given since the 'neutral' class  can not be assumed to be only exactly zero but instead we assume it to be an area around zero.  We looked for a threshold to see how closed to zero a score should be to be interpreted as neutral.   A symmetric threshold is found by optimizing the macro-f1 score on a twitter sentiment corpus (with 1327 examples (the corpus is under construction and will be released later on)) . The threshold is found to be 0.4, which makes our chosen conversion to be:  scores over 0.4 to be 'positive', under -0.4 to be 'negative' and scores between to be neutral. 
+A conversion of the continuous scores of the Sentida tool into three classes is not given since the 'neutral' class  can not be assumed to be only exactly zero but instead we assume it to be an area around zero.  We looked for a threshold to see how closed to zero a score should be to be interpreted as neutral.   A symmetric threshold is found by optimizing the macro-f1 score on a twitter sentiment corpus (with 1327 examples (the corpus is under construction and will be released later on)) . The threshold is found to be 0.4, which makes our chosen conversion to be:  scores over 0.4 to be 'positive', under -0.4 to be 'negative' and scores between to be neutral. However note, the original idea of the tools was not to convert into three  class problem. 
 
 The scripts for the benchmarks can be found [here](https://github.com/alexandrainst/danlp/blob/master/examples/benchmarks/). There is one for the europarl sentiment and LCC sentiment data and another one for the twitter sentiment. This is due to the fact that downloading the twitter data requires login to a twitter API account. The scores below for the twitter data is reported for all the data, but if tweets are delete in the mean time on twitter, not all tweets can be downloaded. 
 In the table we consider the accuracy and macro-f1 in brackets, but to get the scores per class we refer to our benchmark script.
 
 | Tool/Model | Europarl Sentiment | LCC Sentiment | Twitter Sentiment (Polarity) |
 | ---- | ------------------ | ------------- | ---- |
 | AFINN | 0.68 (0.68) | 0.66 (0.61) | 0.48 (0.46) |
-| Sentida | 0.67 (0.65) | 0.58 (0.55) | 0.44 (0.44) |
+| Sentida (version 0.5.0) | 0.67 (0.65) | 0.58 (0.55) | 0.44 (0.44) |
 | Bert Tone (polarity, version 0.0.1) | **0.79** (0.78) | **0.74** (0.67) | **0.73** (0.70) |
 | spaCy sentiment (version 0.0.1) | 0.74 (0.73) | 0.66 (0.61) | 0.66 (0.60) |
 

diff --git a/examples/benchmarks/sentiment_benchmark.py b/examples/benchmarks/sentiment_benchmark.py
@@ -1,6 +1,6 @@
 """
 Evaluation script for sentiment analyis
-**The scripts downloads scrips from a GitHub, and it is last tested on 24-03-2020**
+
 
 The script benchmark on the following dataset where scores are converted into a three class problem: positiv, neutral, negative:
     - Europarl_sentiment
@@ -11,15 +11,12 @@
             the model is integrated in danlp package 
     - Afinn:
            Requirements:
-               - afinn
-    - SentidaV2:
+               - pip install afinn
+    - Sentida:
            Sentida is converted to three class probelm by fitting a treshold for neutral on manualt annotated twitter corpus.
            The script downloadsfilles from sentida github and place them in cache folder
            Requirement:
-               - Pandas
-               - NumPy
-               - NLTK
-                      
+               - pip install sentida
  
 """
 
@@ -111,27 +108,12 @@ def afinn_benchmark(datasets):
 
 
 def sentida_benchmark(datasets):
-    "The scripts download from github from sentindaV2 and place it in cache folder"
-    DEFAULT_CACHE_DIR = os.path.join(str(Path.home()), '.danlp')
-    print(os.getcwd())
-    workdir = DEFAULT_CACHE_DIR +'/sentida'
-    print(workdir) 
-    if not os.path.isdir(workdir):
-        os.mkdir(workdir)
-        url = "https://raw.githubusercontent.com/esbenkc/emma/master/SentidaV2/"
-        for file in ['SentidaV2.py','aarup.csv','intensifier.csv']:
-            urllib.request.urlretrieve(url+file, workdir+'/'+file)
-
-
-
-    sys.path.insert(1, workdir)
-    os.chdir(workdir+ '/')
-    sys.stdout = open(os.devnull, 'w')
-    from SentidaV2 import sentidaV2
-    sys.stdout = sys.__stdout__
+
+    from sentida import Sentida
+    sentida =  Sentida()
 
     def sentida_score(sent):
-        return sentidaV2(sent, output ='total')
+        return sentida.sentida(sent, output ='total')
 
     for dataset in datasets:
         if dataset == 'euparlsent':
@@ -146,7 +128,7 @@ def sentida_score(sent):
         df['pred'] = df.text.map(sentida_score).map(to_label_sentida)
         df['valence'] = df['valence'].map(to_label)
 
-        report(df['valence'], df['pred'], 'SentidaV2', dataset)
+        report(df['valence'], df['pred'], 'Sentida', dataset)
 
 def bert_sent_benchmark(datasets):
     model = load_bert_tone_model()

diff --git a/examples/benchmarks/sentiment_benchmark_twitter.py b/examples/benchmarks/sentiment_benchmark_twitter.py
@@ -4,8 +4,6 @@
 TWITTER_CONSUMER_KEY, TWITTER_CONSUMER_SECRET, TWITTER_ACCESS_TOKEN, TWITTER_ACCESS_SECRET|
 
 
-**The scripts downloads scrips from a GitHub, and it is last tested on 24-03-2020**
-
 The script test both polarity (positive, negative and neutral) and analytic (objective, subjective)
 
 The script benchmark on the following dataset where scores are converted into a three class problem: positiv, neutral, negative:
@@ -17,21 +15,18 @@
             the model is integrated in danlp package 
     - Afinn:
            Requirements:
-               - afinn
+               - pip install afinn
     - SentidaV2:
            Sentida is converted to three class probelm by fitting a treshold for neutral on manualt annotated twitter corpus.
            The script downloadsfilles from sentida github and place them in cache folder
            Requirement:
-               - Pandas
-               - NumPy
-               - NLTK
+               - pip install sentida==0.5.0
                        
  
 """
 
 from danlp.datasets import TwitterSent
 from danlp.models import load_bert_tone_model, load_spacy_model
-from afinn import Afinn
 import numpy as np
 import tabulate
 import os
@@ -102,38 +97,24 @@ def to_label_sentida(score):
 
 
 def afinn_benchmark():
+    from afinn import Afinn
     afinn = Afinn(language='da', emoticons=True)
     df_val['afinn'] = df_val.text.map(afinn.score).map(to_label)
 
     report(df_val['polarity'], df_val['afinn'], 'Afinn', "twitter_sentiment(val)")
 
 
 def sentida_benchmark():
-    "The scripts download from github from sentindaV2 and place it in cache folder"
-    DEFAULT_CACHE_DIR = os.path.join(str(Path.home()), '.danlp')
-    print(os.getcwd())
-    workdir = DEFAULT_CACHE_DIR +'/sentida'
-    print(workdir) 
-    if not os.path.isdir(workdir):
-        os.mkdir(workdir)
-        url = "https://raw.githubusercontent.com/esbenkc/emma/master/SentidaV2/"
-        for file in ['SentidaV2.py','aarup.csv','intensifier.csv']:
-            urllib.request.urlretrieve(url+file, workdir+'/'+file)
-
-
-
-    sys.path.insert(1, workdir)
-    os.chdir(workdir+ '/')
-    sys.stdout = open(os.devnull, 'w')
-    from SentidaV2 import sentidaV2
-    sys.stdout = sys.__stdout__
+
+    from sentida import Sentida
+    sentida =  Sentida()
 
     def sentida_score(sent):
-        return sentidaV2(sent, output ='total')        
+        return sentida.sentida(sent, output ='total')     
 
     df_val['sentida'] = df_val.text.map(sentida_score).map(to_label_sentida)
 
-    report(df_val['polarity'], df_val['sentida'], 'SentidaV2', "twitter_sentiment(val)")
+    report(df_val['polarity'], df_val['sentida'], 'Sentida', "twitter_sentiment(val)")
 
 
 def bert_sent_benchmark():