# Sentiment Analysis of News

In [130]:
from nltk.tokenize import treebank
from nltk.corpus import opinion_lexicon
from nltk.sentiment import SentimentIntensityAnalyzer

import plotly.express as px
import pandas as pd
import numpy as np

In [131]:
nyt = pd.read_csv('csv_files/nyt.csv')

nyt['date']=pd.to_datetime(nyt.pub_date.str.replace(r'T\d{2}:\d{2}:\d{2}\+\d{4}', ''))
nyt['week'] = nyt['date'] - pd.to_timedelta(nyt['date'].dt.dayofweek, unit='d')
nyt['month'] = nyt.date.dt.floor('d') - pd.offsets.MonthBegin(1)
nyt['year'] = pd.to_datetime(nyt.date.dt.year.astype(str)+'-01-01')
nyt['print_page'] = pd.to_numeric(nyt.print_page.str.replace('[aA-zZ]',''))
nyt['importance'] = 1-nyt.print_page/nyt.print_page.max()

nyt.head(2)

Unnamed: 0,snippet,abstract,pub_date,_id,section_name,uri,type_of_material,source,print_section,lead_paragraph,...,web_url,document_type,print_page,headline,author,date,week,month,year,importance
0,Mexican lawmakers reach compromise on country'...,Mexican lawmakers reach compromise on country'...,1998-12-31T05:00:00+0000,nyt://article/6238be98-a87a-5fbd-8a5d-20b15341...,World,nyt://article/6238be98-a87a-5fbd-8a5d-20b15341...,News,The New York Times,A,After weeks of tense impasse and with less tha...,...,https://www.nytimes.com/1998/12/31/world/just-...,article,9.0,"Just Hours Before Deadline, Mexicans Draft an ...",By Ginger Thompson,1998-12-31,1998-12-28,1998-12-01,1998-01-01,0.948276
1,Three new studies report new technique to pinp...,Three new studies report new technique to pinp...,1998-12-29T05:00:00+0000,nyt://article/bd31e0fe-4053-55f8-9e24-86b10281...,Science,nyt://article/bd31e0fe-4053-55f8-9e24-86b10281...,News,The New York Times,F,"Every winter, after flying south from the East...",...,https://www.nytimes.com/1998/12/29/science/on-...,article,5.0,"On the Trail of the Monarch, With the Aid of C...",By Carol Kaesuk Yoon,1998-12-29,1998-12-28,1998-12-01,1998-01-01,0.971264


## Dictionary-based Approach

In [132]:

pos_list=set(opinion_lexicon.positive())
neg_list=set(opinion_lexicon.negative())

tokenizer = treebank.TreebankWordTokenizer()

def sentiment(sentence):
  senti=0
  words = [word.lower() for word in tokenizer.tokenize(sentence)]
  for word in words:
    if word in pos_list:
      senti += 1
    elif word in neg_list:
      senti -= 1
  return senti

nyt['sentiment']=nyt['abstract'].apply(sentiment)
nyt['dict_sentiment']=np.where(nyt.sentiment >=0, nyt.sentiment/nyt.sentiment.max(), -nyt.sentiment/nyt.sentiment.min())

nyt['dict_sentiment_imp'] = nyt.dict_sentiment * nyt.importance

In [133]:

fig = px.histogram(nyt, x="dict_sentiment")
fig.show()

In [134]:
date_nyt = (nyt.groupby('date')['dict_sentiment_imp'].sum()/nyt.groupby('date')['importance'].sum()).reset_index().rename(columns = {0:'Mexico News Sentiment Score'})
fig = px.line(date_nyt, x='date', y='Mexico News Sentiment Score')
fig.show()

In [135]:
week_nyt = (nyt.groupby('week')['dict_sentiment_imp'].sum()/nyt.groupby('week')['importance'].sum()).reset_index()
fig = px.line(week_nyt, x='week', y=0)
fig.show()

In [138]:
month_nyt = (nyt.groupby('month')['dict_sentiment_imp'].sum()/nyt.groupby('month')['importance'].sum()).reset_index().rename(columns = {0:'Mexico News Sentiment Score'})
fig = px.line(month_nyt, x='month', y='Mexico News Sentiment Score')
fig.show()

In [137]:
year_nyt = (nyt.groupby('year')['dict_sentiment_imp'].sum()/nyt.groupby('year')['importance'].sum()).reset_index()
fig = px.line(year_nyt, x='year', y=0)
fig.show()

## VADER Approach

In [152]:
@np.vectorize
def vader_sentiment(text):
    sia = SentimentIntensityAnalyzer()
    return sia.polarity_scores(text)['compound']

In [159]:
nyt['vader_sentiment'] = vader_sentiment(nyt.abstract)
nyt['vader_sentiment_imp'] = nyt.vader_sentiment * nyt.importance

In [160]:
fig = px.histogram(nyt, x="vader_sentiment")
fig.show()

In [161]:
date_nyt = (nyt.groupby('date')['vader_sentiment_imp'].sum()/nyt.groupby('date')['importance'].sum()).reset_index().rename(columns = {0:'Mexico News Sentiment Score'})
fig = px.line(date_nyt, x='date', y='Mexico News Sentiment Score')
fig.show()

In [162]:
week_nyt = (nyt.groupby('week')['vader_sentiment_imp'].sum()/nyt.groupby('week')['importance'].sum()).reset_index()
fig = px.line(week_nyt, x='week', y=0)
fig.show()

In [163]:
month_nyt = (nyt.groupby('month')['vader_sentiment_imp'].sum()/nyt.groupby('month')['importance'].sum()).reset_index().rename(columns = {0:'Mexico News Sentiment Score'})
fig = px.line(month_nyt, x='month', y='Mexico News Sentiment Score')
fig.show()

In [164]:
year_nyt = (nyt.groupby('year')['vader_sentiment_imp'].sum()/nyt.groupby('year')['importance'].sum()).reset_index()
fig = px.line(year_nyt, x='year', y=0)
fig.show()