# Lexicon-based Sentiment Analysis

<a href="https://colab.research.google.com/github/entelecheia/ekorpkit-config/blob/main/notebooks/preprocessor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Instantiating a sentiment analyser class

In [4]:
from ekorpkit import eKonf

config_group='model/sentiment=lm'
cfg = eKonf.compose(config_group=config_group)
cfg.preprocessor.tokenizer.nltk.lemmatize = True
# eKonf.pprint(cfg)
lmsa = eKonf.instantiate(cfg)

tokens = ["Fraud", "Good","Good","Good", "Sound", "uncertain", "beat", "wrong"]

lmsa.predict(tokens)

Loading data from /workspace/projects/ekorpkit/ekorpkit/resources/lexicons/LM.parquet


{'polarity': 0.199999960000008,
 'subjectivity': 0.6249999218750099,
 'label': 'neutral'}

In [5]:
from pprint import pprint
text = "Beyond the improved voice capabilities, customers now have a streamlined way to comply with recalls and other traceability requirements, providing them with a competitive advantage."
tokens = lmsa.tokenize(text)
print(tokens)
pprint(lmsa.analyze(tokens, features=['Negative', 'Positive']))
print(lmsa.predict(text))
text = "Operating loss amounted to EUR 0.7 mn compared to a profit of EUR 0.8 mn in the second quarter of 2005."
print(lmsa.predict(text))


['Beyond', 'the', 'improved', 'voice', 'capability', ',', 'customer', 'now', 'have', 'a', 'streamlined', 'way', 'to', 'comply', 'with', 'recall', 'and', 'other', 'traceability', 'requirement', ',', 'provide', 'them', 'with', 'a', 'competitive', 'advantage', '.']
{'advantage': {'Negative': 0, 'Positive': 2009, 'count': 1},
 'and': {'Negative': 0, 'Positive': 0, 'count': 1},
 'beyond': {'Negative': 0, 'Positive': 0, 'count': 1},
 'capability': {'Negative': 0, 'Positive': 0, 'count': 1},
 'competitive': {'Negative': 0, 'Positive': 0, 'count': 1},
 'comply': {'Negative': 0, 'Positive': 0, 'count': 1},
 'customer': {'Negative': 0, 'Positive': 0, 'count': 1},
 'have': {'Negative': 0, 'Positive': 0, 'count': 1},
 'improved': {'Negative': 0, 'Positive': 2009, 'count': 1},
 'now': {'Negative': 0, 'Positive': 0, 'count': 1},
 'other': {'Negative': 0, 'Positive': 0, 'count': 1},
 'provide': {'Negative': 0, 'Positive': 0, 'count': 1},
 'recall': {'Negative': 2009, 'Positive': 0, 'count': 1},
 'req

In [9]:
from ekorpkit import eKonf

config_group='model/sentiment=hiv4'
cfg = eKonf.compose(config_group=config_group)
cfg.preprocessor.tokenizer.nltk.lemmatize = True
# eKonf.pprint(cfg)
hivsa = eKonf.instantiate(cfg)

tokens = ["Fraud", "Good","Good","Good", "Sound", "uncertain", "beat", "wrong"]

hivsa.predict(tokens)

Loading data from /workspace/projects/ekorpkit/ekorpkit/resources/lexicons/HIV-4.parquet


{'polarity': -0.9999990000010001,
 'subjectivity': 0.12499998437500197,
 'label': 'negative'}

In [10]:
from pprint import pprint
text = "Beyond the improved voice capabilities, customers now have a streamlined way to comply with recalls and other traceability requirements, providing them with a competitive advantage."
tokens = hivsa.tokenize(text)
print(tokens)
pprint(hivsa.analyze(tokens, features=['Negativ', 'Positiv']))
print(hivsa.predict(text))
text = "Operating loss amounted to EUR 0.7 mn compared to a profit of EUR 0.8 mn in the second quarter of 2005."
print(hivsa.predict(text))


['Beyond', 'the', 'improved', 'voice', 'capability', ',', 'customer', 'now', 'have', 'a', 'streamlined', 'way', 'to', 'comply', 'with', 'recall', 'and', 'other', 'traceability', 'requirement', ',', 'provide', 'them', 'with', 'a', 'competitive', 'advantage', '.']
{'a': {'Negativ': None, 'Positiv': None, 'count': 2},
 'advantage': {'Negativ': None, 'Positiv': 'Positiv', 'count': 1},
 'and': {'Negativ': None, 'Positiv': None, 'count': 1},
 'beyond': {'Negativ': None, 'Positiv': None, 'count': 1},
 'capability': {'Negativ': None, 'Positiv': 'Positiv', 'count': 1},
 'competitive': {'Negativ': 'Negativ', 'Positiv': None, 'count': 1},
 'comply': {'Negativ': None, 'Positiv': None, 'count': 1},
 'customer': {'Negativ': None, 'Positiv': None, 'count': 1},
 'now': {'Negativ': None, 'Positiv': None, 'count': 1},
 'requirement': {'Negativ': None, 'Positiv': None, 'count': 1},
 'the': {'Negativ': None, 'Positiv': None, 'count': 1},
 'them': {'Negativ': None, 'Positiv': None, 'count': 1},
 'with': {'