### Sentiment Analysis

In [1]:
# https://medium.com/@b.terryjack/nlp-pre-trained-sentiment-analysis-1eb52a9d742c

In [7]:
# https://www.nltk.org/api/nltk.sentiment.html

import nltk # install nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

sid = SentimentIntensityAnalyzer()

sid.polarity_scores("horror")

{'compound': -0.5719, 'neg': 1.0, 'neu': 0.0, 'pos': 0.0}

In [2]:
sentences = {"pos_sentence": "Amazing!",
             "neu_sentence": "That is great! But I don't know...maybe a little weird", 
             "neg_sentence": "Bad",
            "negated_words": "I am not happy",
            "neutral":"I will go tomorrow to the mountain"}

for e in sentences:
    print("{}: {}".format(e, sid.polarity_scores(sentences[e])))

neutral: {'pos': 0.0, 'neu': 1.0, 'compound': 0.0, 'neg': 0.0}
neu_sentence: {'pos': 0.487, 'neu': 0.513, 'compound': 0.6901, 'neg': 0.0}
negated_words: {'pos': 0.0, 'neu': 0.4, 'compound': -0.4585, 'neg': 0.6}
pos_sentence: {'pos': 1.0, 'neu': 0.0, 'compound': 0.6239, 'neg': 0.0}
neg_sentence: {'pos': 0.0, 'neu': 0.0, 'compound': -0.5423, 'neg': 1.0}


In [5]:
# https://medium.com/@rahulvaish/textblob-and-sentiment-analysis-python-a687e9fabe96
# https://textblob.readthedocs.io/en/dev/

from textblob import TextBlob # install TextBlob

TextBlob("hate").sentiment

Sentiment(polarity=-0.8, subjectivity=0.9)

In [13]:
for e in sentences:
    print("{}: {}".format(e, TextBlob(sentences[e]).sentiment))

pos_sentence: Sentiment(polarity=0.7500000000000001, subjectivity=0.9)
neu_sentence: Sentiment(polarity=0.10416666666666667, subjectivity=0.75)
neg_sentence: Sentiment(polarity=-0.6999999999999998, subjectivity=0.6666666666666666)
negated_words: Sentiment(polarity=-0.4, subjectivity=1.0)
neutral: Sentiment(polarity=0.0, subjectivity=0.0)


### For the recommendation system

In [30]:
text = 'The planet Earth orbits the Sun.'

In [31]:
# tokenize 
tokens = nltk.word_tokenize(text)
print(tokens)

['The', 'planet', 'Earth', 'orbits', 'the', 'Sun', '.']


In [32]:
# tokenize removing punctuation
from nltk.tokenize import RegexpTokenizer
tokenizer = RegexpTokenizer(r'\w+')
tokens = tokenizer.tokenize(text)
tokens

['The', 'planet', 'Earth', 'orbits', 'the', 'Sun']

In [33]:
# remove stop words

from nltk.corpus import stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
stop_words

tokens_clean = [e for e in tokens if e not in stop_words]
tokens_clean

[nltk_data] Downloading package stopwords to /Users/bego/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


['The', 'planet', 'Earth', 'orbits', 'Sun']

In [34]:
# other recommender systems
# https://www.kernix.com/blog/recommender-system-based-on-natural-language-processing_p10
