In [25]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [26]:
import pandas as pd
import numpy as np
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import nltk
from nltk.tokenize import word_tokenize
from nltk.tokenize import sent_tokenize
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/taohidshadat/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [27]:
# load data
speeches = pd.read_csv('speeches.csv')
nasdaq = pd.read_csv('^IXIC.csv')
sp500 = pd.read_csv('^GSPC.csv')

In [28]:
speeches.head()

Unnamed: 0,Date,Link,speech
0,2/11/2020,https://www.federalreserve.gov/newsevents/test...,Chair Powell submitted identical remarks to th...
1,4/9/2020,https://www.federalreserve.gov/newsevents/test...,Good morning. The challenge we face today is d...
2,5/12/2020,https://www.federalreserve.gov/newsevents/test...,"Chairman Crapo, Ranking Member Brown, members ..."
3,5/13/2020,https://www.federalreserve.gov/newsevents/spee...,The coronavirus has left a devastating human a...
4,5/19/2020,https://www.federalreserve.gov/newsevents/test...,"Chairman Crapo, Ranking Member Brown, and othe..."


In [29]:
nasdaq.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2/10/2020,9493.63,9628.66,9493.63,9628.39,9628.39,2187520000
1,2/11/2020,9680.89,9714.74,9617.21,9638.94,9638.94,2450070000
2,2/12/2020,9688.6,9728.77,9666.69,9725.96,9725.96,2366510000
3,4/8/2020,7975.720215,8114.430176,7901.939941,8090.899902,8090.899902,3487440000
4,4/9/2020,8169.009766,8227.910156,8072.319824,8153.580078,8153.580078,4145460000


In [30]:
sp500.head()

Unnamed: 0,Date,Open,High,Low,Close*,Adj Close,Volume
0,2/10/2020,3318.28,3352.26,3317.77,3352.09,3352.09,3462730000
1,2/11/2020,3365.87,3375.63,3352.72,3357.75,3357.75,3762940000
2,2/12/2020,3370.5,3381.47,3369.72,3379.45,3379.45,3930910000
3,4/8/2020,2685.0,2760.75,2663.3,2749.98,2749.98,5875710000
4,4/9/2020,2776.99,2818.57,2762.36,2789.82,2789.82,7899550000


In [31]:
# Converting all date columns to date type
speeches['Date'] = pd.to_datetime(speeches['Date'])
sp500['Date'] = pd.to_datetime(sp500['Date'])
nasdaq['Date'] = pd.to_datetime(nasdaq['Date'])

In [32]:
# stripping all commas
nasdaq = nasdaq.applymap(lambda x: x.replace(',', '') if isinstance(x, str) else x)

In [33]:
# stripping all commas
sp500 = sp500.applymap(lambda x: x.replace(',', '') if isinstance(x, str) else x)

In [34]:
# converting all columns but date column to float type
nasdaq.loc[:, nasdaq.select_dtypes(include=[object]).columns] = nasdaq.select_dtypes(include=[object]).astype(float)

In [35]:
# converting all columns but date column to float type
sp500.loc[:, sp500.select_dtypes(include=[object]).columns] = sp500.select_dtypes(include=[object]).astype(float)

In [36]:
# calculating percent change of Open and Close
nasdaq['Pct_Change'] = ((nasdaq['Adj Close'] - nasdaq['Open']) / (nasdaq['Open'])) * 100

In [37]:
# calculating percent change of Open and Close
sp500['Pct_Change'] = ((sp500['Adj Close'] - sp500['Open']) / (sp500['Open'])) * 100

In [38]:
# prefixing column names
sp500_prefix = sp500[sp500.columns.drop('Date')].add_prefix('sp500_')
sp500 = pd.concat([sp500['Date'], sp500_prefix], axis=1)
nasdaq_prefix = nasdaq[nasdaq.columns.drop('Date')].add_prefix('nasdaq_')
nasdaq = pd.concat([nasdaq['Date'], nasdaq_prefix], axis=1)

In [39]:
# initializng Vader Sentiment
analyzer = SentimentIntensityAnalyzer()

In [40]:
# get speech scores
speeches['Speech_Scores'] = speeches['speech'].apply(lambda x: analyzer.polarity_scores(x))

In [41]:
speeches.head()

Unnamed: 0,Date,Link,speech,Speech_Scores
0,2020-02-11,https://www.federalreserve.gov/newsevents/test...,Chair Powell submitted identical remarks to th...,"{'neg': 0.051, 'neu': 0.796, 'pos': 0.153, 'co..."
1,2020-04-09,https://www.federalreserve.gov/newsevents/test...,Good morning. The challenge we face today is d...,"{'neg': 0.08, 'neu': 0.747, 'pos': 0.172, 'com..."
2,2020-05-12,https://www.federalreserve.gov/newsevents/test...,"Chairman Crapo, Ranking Member Brown, members ...","{'neg': 0.106, 'neu': 0.745, 'pos': 0.149, 'co..."
3,2020-05-13,https://www.federalreserve.gov/newsevents/spee...,The coronavirus has left a devastating human a...,"{'neg': 0.126, 'neu': 0.754, 'pos': 0.12, 'com..."
4,2020-05-19,https://www.federalreserve.gov/newsevents/test...,"Chairman Crapo, Ranking Member Brown, and othe...","{'neg': 0.078, 'neu': 0.756, 'pos': 0.166, 'co..."


In [42]:
# function to get sentiment of speech
def get_sentiment(speech):
    scores = analyzer.polarity_scores(speech)
    score = scores['compound']
    if score >= 0.5:
        return 'Positive'
    elif score < 0.5 and score >= -0.5:
        return 'Neutral'
    else:
        return 'Negative'
    
speeches['Speech_Sentiment'] = speeches['speech'].apply(get_sentiment)

In [43]:
speeches.head()

Unnamed: 0,Date,Link,speech,Speech_Scores,Speech_Sentiment
0,2020-02-11,https://www.federalreserve.gov/newsevents/test...,Chair Powell submitted identical remarks to th...,"{'neg': 0.051, 'neu': 0.796, 'pos': 0.153, 'co...",Positive
1,2020-04-09,https://www.federalreserve.gov/newsevents/test...,Good morning. The challenge we face today is d...,"{'neg': 0.08, 'neu': 0.747, 'pos': 0.172, 'com...",Positive
2,2020-05-12,https://www.federalreserve.gov/newsevents/test...,"Chairman Crapo, Ranking Member Brown, members ...","{'neg': 0.106, 'neu': 0.745, 'pos': 0.149, 'co...",Positive
3,2020-05-13,https://www.federalreserve.gov/newsevents/spee...,The coronavirus has left a devastating human a...,"{'neg': 0.126, 'neu': 0.754, 'pos': 0.12, 'com...",Negative
4,2020-05-19,https://www.federalreserve.gov/newsevents/test...,"Chairman Crapo, Ranking Member Brown, and othe...","{'neg': 0.078, 'neu': 0.756, 'pos': 0.166, 'co...",Positive


In [44]:
# define function to find the most positive and negative sentences in a speech
def find_most_positive_and_negative_sentences(speech):
    # split the speech into sentences
    sentences = nltk.sent_tokenize(speech)

    # initialize variables to store the most positive and negative sentences
    most_positive = {'sentence': '', 'score': -1}
    most_negative = {'sentence': '', 'score': 1}

    # iterate over each sentence and compute the sentiment score
    for sentence in sentences:
        # compute the sentiment score using VADER
        score = analyzer.polarity_scores(sentence)['compound']

        # check if the current sentence has the most positive or negative score
        if score > most_positive['score']:
            most_positive['score'] = score
            most_positive['sentence'] = sentence
        elif score < most_negative['score']:
            most_negative['score'] = score
            most_negative['sentence'] = sentence

    # return the most positive and negative sentences
    return most_positive['sentence'], most_negative['sentence']

speeches[['Most positive sentence', 'Most negative sentence']] = speeches['speech'].apply(
                        find_most_positive_and_negative_sentences).apply(pd.Series)

In [45]:
speeches.head()

Unnamed: 0,Date,Link,speech,Speech_Scores,Speech_Sentiment,Most positive sentence,Most negative sentence
0,2020-02-11,https://www.federalreserve.gov/newsevents/test...,Chair Powell submitted identical remarks to th...,"{'neg': 0.051, 'neu': 0.796, 'pos': 0.153, 'co...",Positive,This low interest rate environment may limit t...,Residential investment turned up in the second...
1,2020-04-09,https://www.federalreserve.gov/newsevents/test...,Good morning. The challenge we face today is d...,"{'neg': 0.08, 'neu': 0.747, 'pos': 0.172, 'com...",Positive,"Even more importantly, we have acted to safegu...",The coronavirus has spread quickly around the ...
2,2020-05-12,https://www.federalreserve.gov/newsevents/test...,"Chairman Crapo, Ranking Member Brown, members ...","{'neg': 0.106, 'neu': 0.745, 'pos': 0.149, 'co...",Positive,"Over the past two months, the Federal Reserve ...","More than a decade ago, U.S. banking organizat..."
3,2020-05-13,https://www.federalreserve.gov/newsevents/spee...,The coronavirus has left a devastating human a...,"{'neg': 0.126, 'neu': 0.754, 'pos': 0.12, 'com...",Negative,"And fourth, temporary regulatory adjustments t...",The scope and speed of this downturn are witho...
4,2020-05-19,https://www.federalreserve.gov/newsevents/test...,"Chairman Crapo, Ranking Member Brown, and othe...","{'neg': 0.078, 'neu': 0.756, 'pos': 0.166, 'co...",Positive,"In addition to monetary policy, we took forcef...","By these measures and many others, the scope a..."


In [49]:
# using sumy to summarize the speech to understand it better
#!pip install sumy
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.text_rank import TextRankSummarizer

In [50]:
def summarize_speech(text):
    # Set the number of sentences in the summary
    num_sentences = 1

    # Initialize the TextRank summarizer
    summarizer = TextRankSummarizer()

    # Parse the text and tokenize it
    parser = PlaintextParser.from_string(text, Tokenizer("english"))

    # Generate the summary and join the sentences
    summary = " ".join([str(sentence) for sentence in summarizer(parser.document, num_sentences)])

    # return the summary
    return summary

In [51]:
# applying function
speeches['Summary'] = speeches['speech'].apply(summarize_speech)

In [52]:
# applying function
speeches['Summary_Sentiment'] = speeches['Summary'].apply(get_sentiment)

In [53]:
# combining all data together
df = pd.merge(speeches, nasdaq[['Date','nasdaq_Pct_Change']], how = 'left', on = 'Date')
df = pd.merge(df, sp500[['Date','sp500_Pct_Change']], how = 'left', on = 'Date')

In [54]:
df

Unnamed: 0,Date,Link,speech,Speech_Scores,Speech_Sentiment,Most positive sentence,Most negative sentence,Summary,Summary_Sentiment,nasdaq_Pct_Change,sp500_Pct_Change
0,2020-02-11,https://www.federalreserve.gov/newsevents/test...,Chair Powell submitted identical remarks to th...,"{'neg': 0.051, 'neu': 0.796, 'pos': 0.153, 'co...",Positive,This low interest rate environment may limit t...,Residential investment turned up in the second...,"Over the second half of 2019, the FOMC shifted...",Neutral,-0.433328,-0.241245
1,2020-04-09,https://www.federalreserve.gov/newsevents/test...,Good morning. The challenge we face today is d...,"{'neg': 0.08, 'neu': 0.747, 'pos': 0.172, 'com...",Positive,"Even more importantly, we have acted to safegu...",The coronavirus has spread quickly around the ...,Many of the programs we are undertaking to sup...,Positive,-0.188881,0.462011
2,2020-05-12,https://www.federalreserve.gov/newsevents/test...,"Chairman Crapo, Ranking Member Brown, members ...","{'neg': 0.106, 'neu': 0.745, 'pos': 0.149, 'co...",Positive,"Over the past two months, the Federal Reserve ...","More than a decade ago, U.S. banking organizat...",We advised institutions that working construct...,Positive,-2.412975,-2.360265
3,2020-05-13,https://www.federalreserve.gov/newsevents/spee...,The coronavirus has left a devastating human a...,"{'neg': 0.126, 'neu': 0.754, 'pos': 0.12, 'com...",Negative,"And fourth, temporary regulatory adjustments t...",The scope and speed of this downturn are witho...,Long stretches of unemployment can damage or e...,Negative,-1.586488,-1.600218
4,2020-05-19,https://www.federalreserve.gov/newsevents/test...,"Chairman Crapo, Ranking Member Brown, and othe...","{'neg': 0.078, 'neu': 0.756, 'pos': 0.166, 'co...",Positive,"In addition to monetary policy, we took forcef...","By these measures and many others, the scope a...","In addition to monetary policy, we took forcef...",Positive,-0.459065,-0.869907
5,2020-05-21,https://www.federalreserve.gov/newsevents/spee...,Good afternoon. I just want to say a few words...,"{'neg': 0.034, 'neu': 0.844, 'pos': 0.122, 'co...",Positive,But all of us have our own decisions to make a...,"And while the burden is widespread, it is not ...","From an economic perspective, we hope to learn...",Neutral,-0.963293,-0.721898
6,2020-06-16,https://www.federalreserve.gov/newsevents/test...,Chair Powell submitted identical remarks to th...,"{'neg': 0.064, 'neu': 0.782, 'pos': 0.154, 'co...",Positive,"To support the small business sector, we estab...",I want to end by acknowledging the tragic even...,To provide stability to the financial system a...,Positive,-0.541823,-0.199936
7,2020-06-19,https://www.federalreserve.gov/newsevents/spee...,"Thank you, President Mester and Treye Johnson,...","{'neg': 0.109, 'neu': 0.759, 'pos': 0.132, 'co...",Positive,"But given the opportunity, I'll always bet on ...",A particular cruelty of the pandemic has been ...,And employers' input has influenced work acros...,Neutral,-0.95607,-1.35497
8,2020-06-30,https://www.federalreserve.gov/newsevents/test...,"Chairwoman Waters, Ranking Member McHenry, and...","{'neg': 0.059, 'neu': 0.765, 'pos': 0.176, 'co...",Positive,"In addition to these steps, we took forceful m...",In contrast to the 2008 crisis when banks pull...,"In addition to these steps, we took forceful m...",Positive,1.857966,1.642187
9,2020-08-27,https://www.federalreserve.gov/newsevents/spee...,"Thank you, Esther, for that introduction, and ...","{'neg': 0.059, 'neu': 0.826, 'pos': 0.115, 'co...",Positive,This emphasis on transparency reflected what w...,Having declined significantly in the five year...,"For the past year and a half, my colleagues an...",Neutral,-0.537727,-0.016929
