# Sentiment Analysis

In [18]:
import pandas as pd
from nltk.sentiment.vader import SentimentIntensityAnalyzer

import warnings
warnings.filterwarnings("ignore")

In [19]:
merc_df = pd.read_csv('./data/merck_news.csv')
merc_df['date'] = pd.to_datetime(merc_df['date'])
merc_df.set_index('date', inplace=True)
merc_df.sort_index(inplace=True)
merc_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 492 entries, 2009-08-17 12:14:00+00:00 to 2020-06-11 14:22:00+00:00
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   headline   492 non-null    object
 1   sentiment  492 non-null    object
dtypes: object(2)
memory usage: 11.5+ KB


In [20]:
# Define Merck text.
merc_df.head()
text = merc_df.iloc[244]['headline']
print(text)

The Stocks That Moved The S&P, Dow And Nasdaq Today


In [21]:
# Instantiate Sentiment Intensity Analyzer
sia = SentimentIntensityAnalyzer()
merc_df.columns

Index(['headline', 'sentiment'], dtype='object')

In [22]:
# Calculate sentiment of text
sia.polarity_scores(text)

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

In [23]:
merc_df['neg_sentiment'] = merc_df['headline'].apply(lambda x: sia.polarity_scores(x)['neg'])
merc_df['neu_sentiment'] = merc_df['headline'].apply(lambda x: sia.polarity_scores(x)['neu'])
merc_df['pos_sentiment'] = merc_df['headline'].apply(lambda x: sia.polarity_scores(x)['pos'])
merc_df['compound_sentiment'] = merc_df['headline'].apply(lambda x: sia.polarity_scores(x)['compound'])

In [24]:
merc_df.head()

Unnamed: 0_level_0,headline,sentiment,neg_sentiment,neu_sentiment,pos_sentiment,compound_sentiment
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2009-08-17 12:14:00+00:00,"BenchmarkJournal.com Free Analyst Review for ABK, EQR, KMB, BBD, FIG and MRK",neutral,0.0,0.769,0.231,0.5106
2009-09-18 07:38:00+00:00,Sanofi-aventis (SNY) Completes Acquisition of Merial,neutral,0.0,1.0,0.0,0.0
2009-11-04 20:38:00+00:00,"Fitch Downgrades Merck, Outlook Stable",mildly bearish,0.0,0.645,0.355,0.296
2009-11-04 21:02:00+00:00,Merck Says It Won't Seek Another Larger Merger,mildly bullish,0.0,1.0,0.0,0.0
2009-11-13 16:32:00+00:00,Merck (MRK) Drugs Under Scanner Of Cholesterol Study,neutral,0.0,1.0,0.0,0.0


In [25]:
# set column width
pd.set_option('display.max_colwidth', 130)

In [26]:
merc_df[merc_df['headline'].str.contains('Downgrades')]['headline']

date
2009-11-04 20:38:00+00:00                                                  Fitch Downgrades Merck, Outlook Stable
2010-07-30 18:11:00+00:00                             Hapoalim Securities Downgrades Merck to Hold from Buy (MRK)
2012-08-24 11:13:00+00:00             Bank of America Downgrades Merck & from Buy to Neutral, Announces PO of $45
2012-08-24 16:05:00+00:00    UPDATE: Bank of America Merrill Lynch Downgrades Merck to Neutral, Reiterates $45 PT
2013-10-14 10:52:00+00:00       Bernstein Downgrades Merck & Company, Inc. to Market Perform, Lowers PT to $50.00
2013-10-14 11:46:00+00:00                                                               Benzinga's Top Downgrades
2016-07-18 10:29:00+00:00                                     BMO Capital Downgrades Merck & Co to Market Perform
2017-10-30 10:40:00+00:00                                    Morgan Stanley Downgrades Merck & Co to Equal-Weight
2017-10-30 13:22:00+00:00                                Benzinga's Top Upgrades, D

In [27]:
merc_df.to_csv('./data/merc_news_sentiment.csv')

In [28]:
from transformers import pipeline

In [29]:
sia_pipeline = pipeline('sentiment-analysis')

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [30]:
sia_pipeline("Bank of America Merrill Lynch Downgrades Merck to Neutral, Reiterates $45 PT")

[{'label': 'NEGATIVE', 'score': 0.9975982308387756}]

In [31]:
sia.polarity_scores("Bank of America Merrill Lynch Downgrades Merck to Neutral, Reiterates $45 PT")

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

In [32]:
# select only april 2021 to may 2021
# merc_df_apr = merc_df.loc['2021-02-01':'2021-06-31']
# use datetime index to filter
merc_df_apr = merc_df[(merc_df.index >= '2019-05-31')]
merc_df_apr.head(30)

Unnamed: 0_level_0,headline,sentiment,neg_sentiment,neu_sentiment,pos_sentiment,compound_sentiment
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-05-31 10:46:00+00:00,Merck Reports KEYTRUDA Showed Improved Overall Survival as First-Line Treatment of Recurrent or Metastatic Head and Neck Squam...,mildly bullish,0.0,0.897,0.103,0.4767
2019-06-01 11:36:00+00:00,Merck Announces Five-Year Survival Data for KEYTRUDA in Advanced Non-Small Cell Lung Cancer from First KEYNOTE Trial at #ASCO19,mildly bullish,0.188,0.726,0.085,-0.5267
2019-06-03 20:15:00+00:00,Merck Reports LYNPARZA Phase 3 SOLO3 Trial Showed 72% Objective Response Rate in Patients with Platinum-Sensitive Relapsed Ger...,mildly bullish,0.136,0.802,0.062,-0.5267
2019-06-04 11:45:00+00:00,"The Daily Biotech Pulse: ASCO Presentations From Roche, Celgene & More, Edwards Recommends Rejection Of Mini-Tender Offer",neutral,0.189,0.702,0.109,-0.3788
2019-06-04 17:07:00+00:00,Shares of several healthcare companies are trading higher with the overall US market. Recent comments out of China and Mexico ...,mildly bullish,0.0,0.82,0.18,0.6908
2019-06-11 13:14:00+00:00,"FDA Greenlights Merck's Blockbuster Drug Keytruda For First-Line Treatment Of Head, Neck Cancer",mildly bullish,0.228,0.57,0.202,-0.128
2019-06-18 08:27:00+00:00,"AstraZeneca, Merck Announce Lynparza Approved in the EU for 1st-line Maintenance Treatment Of BRCA-mutated Advanced Ovarian Ca...",mildly bullish,0.19,0.603,0.207,-0.1531
2019-06-19 08:58:00+00:00,"AstraZeneca,Merck Announce Lynparza Approved in Japan for 1st-line Maintenance Therapy in BRCA-mutated Advanced Ovarian Cancer",mildly bullish,0.208,0.566,0.226,-0.1531
2019-06-19 19:43:00+00:00,Report: Merck Searching Internally For Next CEO,neutral,0.0,1.0,0.0,0.0
2019-06-22 19:07:00+00:00,"The Week Ahead In Biotech: Conferences, PDUFA Dates, Clinical Trial Readouts And IPOs",neutral,0.0,1.0,0.0,0.0


In [33]:
merc_df_apr = merc_df[(merc_df.index >= '2019-01-01')]
merc_df_apr.head(30)

Unnamed: 0_level_0,headline,sentiment,neg_sentiment,neu_sentiment,pos_sentiment,compound_sentiment
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-01-14 22:02:00+00:00,"Merck Reports KEYTRUDA Reduced Risk Of Death By 31% Vs Chemotherapy In Previously-Treated Patients With Advanced Esophageal, E...",mildly bullish,0.207,0.724,0.069,-0.6124
2019-02-01 09:46:00+00:00,"12 Stocks To Watch For February 1, 2019",neutral,0.0,1.0,0.0,0.0
2019-02-01 14:10:00+00:00,"The Daily Biotech Pulse: Merck Earnings, Positive Relaunch Of Titan's Opioid Disorder Treatment Implant",mildly bullish,0.148,0.656,0.197,0.2263
2019-02-05 11:49:00+00:00,FDA Accepts For Review New Drug Application For Merck's Investigational Combination Of Imipenem/Cilastatin And Relebactam; Acc...,mildly bullish,0.0,0.892,0.108,0.3182
2019-02-05 13:39:00+00:00,Codexis shares are trading higher after the company reported a multi-year agreement with Merck to perform technology upgrades.,mildly bullish,0.0,0.735,0.265,0.6597
2019-02-21 13:09:00+00:00,31 Stocks Moving In Thursday's Pre-Market Session,neutral,0.0,1.0,0.0,0.0
2019-02-22 10:02:00+00:00,65 Biggest Movers From Yesterday,neutral,0.0,1.0,0.0,0.0
2019-02-25 13:56:00+00:00,"Pharma M&A Wave: Spark Therapeutics, Clementia Pharma Among The Rare Disease Biotechs Acquired",mildly bullish,0.0,0.863,0.137,0.2263
2019-02-26 09:01:00+00:00,"AstraZeneca, Merck Announce Phase III POLO Trial Met its Primary Endpoint of Progression-free Survival",mildly bullish,0.0,1.0,0.0,0.0
2019-03-22 12:16:00+00:00,"The Daily Biotech Pulse: Immutep Gets US Patent, Conatus NASH Disappointment, Eisai Moves On After Aducanumab Setback",neutral,0.171,0.829,0.0,-0.5106


In [None]:
## Project is based on lesson 6.03 and 6.04 NLP and NLTK