# Sentiment Analysis

In [13]:
import pandas as pd
from nltk.sentiment.vader import SentimentIntensityAnalyzer

import warnings
warnings.filterwarnings("ignore")

In [14]:
# # Let's come up with a list of positive and negative words we might observe.

# positive_words = ['improvement', 'recommends', 'delight', 'good', 'great', 'awesome', 'tremendous', 'fabulous',
#                  'amazing', 'stellar', 'fantastic', 'super']
# negative_words = ['adverse', 'unlikely', 'ugly', 'bad', 'disgusting', 'terrible', 'gross', 'awful', 'worst']

In [68]:
merc_df = pd.read_csv('./data/merc_news.csv')
merc_df['date'] = pd.to_datetime(merc_df['date'])
merc_df.set_index('date', inplace=True)
merc_df.sort_index(inplace=True)
merc_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 492 entries, 2009-08-17 12:14:00+00:00 to 2020-06-11 14:22:00+00:00
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   headline   492 non-null    object
 1   sentiment  492 non-null    object
dtypes: object(2)
memory usage: 11.5+ KB


In [69]:
# def simple_sentiment(text):
#     # Instantiate tokenizer.
#     tokenizer = RegexpTokenizer(r'\w+')
    
#     # Tokenize text.
#     tokens = tokenizer.tokenize(text.lower())
    
#     # Instantiate stemmer.
#     p_stemmer = PorterStemmer()
    
#     # Stem words.
#     stemmed_words = [p_stemmer.stem(i) for i in tokens]
    
#     # Stem our positive/negative words.
#     positive_stems = [p_stemmer.stem(i) for i in positive_words]
#     negative_stems = [p_stemmer.stem(i) for i in negative_words]

#     # Count "positive" words.
#     positive_count = sum([1 for i in stemmed_words if i in positive_stems])
    
#     # Count "negative" words
#     negative_count = sum([1 for i in stemmed_words if i in negative_stems])
    
#     # Calculate Sentiment Percentage 
#     # (Positive Count - Negative Count) / (Total Count)

#     return round((positive_count - negative_count) / len(tokens), 2)

In [70]:
# Define Merck text.
merc_df.head()
text = merc_df.iloc[244]['headline']
print(text)

The Stocks That Moved The S&P, Dow And Nasdaq Today


In [71]:
# Instantiate Sentiment Intensity Analyzer
sia = SentimentIntensityAnalyzer()
merc_df.columns

Index(['headline', 'sentiment'], dtype='object')

In [72]:
# Calculate sentiment of text
sia.polarity_scores(text)

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

In [73]:
merc_df['neg_sentiment'] = merc_df['headline'].apply(lambda x: sia.polarity_scores(x)['neg'])
merc_df['neu_sentiment'] = merc_df['headline'].apply(lambda x: sia.polarity_scores(x)['neu'])
merc_df['pos_sentiment'] = merc_df['headline'].apply(lambda x: sia.polarity_scores(x)['pos'])
merc_df['compound_sentiment'] = merc_df['headline'].apply(lambda x: sia.polarity_scores(x)['compound'])

In [74]:
merc_df.head()

Unnamed: 0_level_0,headline,sentiment,neg_sentiment,neu_sentiment,pos_sentiment,compound_sentiment
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2009-08-17 12:14:00+00:00,BenchmarkJournal.com Free Analyst Review for A...,neutral,0.0,0.769,0.231,0.5106
2009-09-18 07:38:00+00:00,Sanofi-aventis (SNY) Completes Acquisition of ...,neutral,0.0,1.0,0.0,0.0
2009-11-04 20:38:00+00:00,"Fitch Downgrades Merck, Outlook Stable",mildly bearish,0.0,0.645,0.355,0.296
2009-11-04 21:02:00+00:00,Merck Says It Won't Seek Another Larger Merger,mildly bullish,0.0,1.0,0.0,0.0
2009-11-13 16:32:00+00:00,Merck (MRK) Drugs Under Scanner Of Cholesterol...,neutral,0.0,1.0,0.0,0.0


In [83]:
# set column width
pd.set_option('display.max_colwidth', 130)

In [85]:
merc_df[merc_df['headline'].str.contains('Downgrades')]['headline']

date
2009-11-04 20:38:00+00:00                                                  Fitch Downgrades Merck, Outlook Stable
2010-07-30 18:11:00+00:00                             Hapoalim Securities Downgrades Merck to Hold from Buy (MRK)
2012-08-24 11:13:00+00:00             Bank of America Downgrades Merck & from Buy to Neutral, Announces PO of $45
2012-08-24 16:05:00+00:00    UPDATE: Bank of America Merrill Lynch Downgrades Merck to Neutral, Reiterates $45 PT
2013-10-14 10:52:00+00:00       Bernstein Downgrades Merck & Company, Inc. to Market Perform, Lowers PT to $50.00
2013-10-14 11:46:00+00:00                                                               Benzinga's Top Downgrades
2016-07-18 10:29:00+00:00                                     BMO Capital Downgrades Merck & Co to Market Perform
2017-10-30 10:40:00+00:00                                    Morgan Stanley Downgrades Merck & Co to Equal-Weight
2017-10-30 13:22:00+00:00                                Benzinga's Top Upgrades, D

In [88]:
merc_df.to_csv('./data/merc_news_sentiment.csv')

In [89]:
from transformers import pipeline

In [90]:
sia_pipeline = pipeline('sentiment-analysis')

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [91]:
sia_pipeline("Bank of America Merrill Lynch Downgrades Merck to Neutral, Reiterates $45 PT")

[{'label': 'NEGATIVE', 'score': 0.9975982308387756}]

In [92]:
sia.polarity_scores("Bank of America Merrill Lynch Downgrades Merck to Neutral, Reiterates $45 PT")

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}