# VADER Sentiment Analysis

In [2]:
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [11]:
df=pd.read_csv("data/4.12.15-5.11.24_articles.csv")

In [12]:
data = []
for index, row in df.iterrows():
    date_only = row['Date and Time'].split('T')[0]
    data.append({'title': row['Article Title'], 'publishedAt': date_only})

df = pd.DataFrame(data, columns=['title', 'publishedAt'])

In [14]:
df

Unnamed: 0,title,publishedAt
0,Why Apple Watch Is Just 'Noise' And Apple Inve...,2015-04-12
1,Analyst: Market Is Wrong On Threat To Garmin L...,2015-04-11
2,Weekly Tech Highlights: Apple's TV Service Is ...,2015-04-11
3,"6% Of US Adults Plan To Purchase Apple Watch, ...",2015-04-15
4,"6% of US Adults Plan to Purchase Apple Watch, ...",2015-04-15
...,...,...
16822,Apple's iPad Sales To Get A 'Nice Boost' This ...,2024-05-08
16823,Jim Cramer Advises Investors To Brace For Econ...,2024-05-08
16824,Will The Apple Pencil Pro Work With Your iPad?...,2024-05-08
16825,Apple Inches Closer To Choosing Sam Altman-Led...,2024-05-11


In [15]:
# Initialize VADER
analyzer = SentimentIntensityAnalyzer()

# Function to get sentiments
def get_sentiment(text):
    return analyzer.polarity_scores(text)

# Apply sentiment analysis
df['sentiments'] = df['title'].apply(get_sentiment)
df = pd.concat([df.drop(['sentiments'], axis=1), df['sentiments'].apply(pd.Series)], axis=1)

In [16]:
df

Unnamed: 0,title,publishedAt,neg,neu,pos,compound
0,Why Apple Watch Is Just 'Noise' And Apple Inve...,2015-04-12,0.000,1.000,0.000,0.0000
1,Analyst: Market Is Wrong On Threat To Garmin L...,2015-04-11,0.394,0.606,0.000,-0.7579
2,Weekly Tech Highlights: Apple's TV Service Is ...,2015-04-11,0.350,0.650,0.000,-0.7906
3,"6% Of US Adults Plan To Purchase Apple Watch, ...",2015-04-15,0.000,0.882,0.118,0.4215
4,"6% of US Adults Plan to Purchase Apple Watch, ...",2015-04-15,0.000,1.000,0.000,0.0000
...,...,...,...,...,...,...
16822,Apple's iPad Sales To Get A 'Nice Boost' This ...,2024-05-08,0.000,0.819,0.181,0.4118
16823,Jim Cramer Advises Investors To Brace For Econ...,2024-05-08,0.000,0.788,0.212,0.6249
16824,Will The Apple Pencil Pro Work With Your iPad?...,2024-05-08,0.000,1.000,0.000,0.0000
16825,Apple Inches Closer To Choosing Sam Altman-Led...,2024-05-11,0.000,1.000,0.000,0.0000


In [17]:
data_aggregated = []

# Group by 'publishedAt' date
grouped = df.groupby('publishedAt')

for date, group in grouped:
    # Initialize sentiment sums
    pos_sum = neu_sum = neg_sum = compound_sum = 0
    titles = []

    # Loop through each article in the group
    for _, article in group.iterrows():
        sentiment = get_sentiment(article['title'])
        pos_sum += sentiment['pos']
        neu_sum += sentiment['neu']
        neg_sum += sentiment['neg']
        compound_sum += sentiment['compound']
        titles.append(article['title'])

    # Calculate average sentiments for the group
    n = len(group)
    avg_pos = pos_sum / n
    avg_neu = neu_sum / n
    avg_neg = neg_sum / n
    avg_compound = compound_sum / n

    data_aggregated.append({
        "date": date,
        "article titles": titles,
        "pos": avg_pos,
        "neu": avg_neu,
        "neg": avg_neg,
        "compound": avg_compound
    })

df_aggregated = pd.DataFrame(data_aggregated, columns=['date', 'article titles', 'pos', 'neu', 'neg', 'compound'])

In [18]:
df_aggregated['number of articles'] = df_aggregated['article titles'].apply(lambda x: len(x))

In [20]:
df_aggregated.to_csv('data/4.12.15-5.11.24_sentiments.csv', encoding='utf-8', index=False)