In [1]:
import pandas as pd
import altair as alt

In [2]:
# import scraped csv to pandas df
briefings_df = pd.read_csv('../data/cleaned_briefings.csv')
briefings_df

Unnamed: 0,date,timestamp,speaker,text
0,2020-02-26,05:39,Donald Trump,Thank you very much everybody. Thank you very ...
1,2020-02-26,06:59,Donald Trump,A lot of people thought we shouldn’t have done...
2,2020-02-26,07:51,Donald Trump,We have a total of 15. We took in some from Ja...
3,2020-02-26,09:58,Donald Trump,China you know about. Where it started. I spok...
4,2020-02-26,10:52,Donald Trump,"We’re bringing in a specialist, a very highly ..."
...,...,...,...,...
9674,2020-04-27,01:01:23,Unnamed (Reporter),[crosstalk 00:13:23].
9675,2020-04-27,01:01:24,Donald Trump,"Let’s do one more. Please, in the back."
9676,2020-04-27,01:01:26,Unnamed,If an American president loses more Americans ...
9677,2020-04-27,01:01:36,Donald Trump,"So yeah, we’ve lost a lot of people, but if yo..."


### Emotion Analysis using the NRC Emotion Lexicon

- Rather than using just the simple TextBlob or Vader packages for sentiment analysis, I thought it would be interesting to explore emotional tone using
- Let's see what we can uncover using the popular open-sourced emotion lexicon published by the NRC (National Research Council Canada).
- In addition to 'positive' and 'negative', we have word associations for 8 overarching emotion categories.
- For simplicity, let's remove words without scores as well as those that are associated with 8 or more of the 10 categories 

In [53]:
# read in raw emotion lexicon
filepath = "../NRC-Sentiment-Emotion-Lexicons/NRC-Emotion-Lexicon-v0.92/NRC-Emotion-Lexicon-Wordlevel-v0.92.txt"
emolex_df = pd.read_csv(filepath,  names=["word", "emotion", "association"], skiprows=1, sep='\t')

# pivot df so we have one row per word, one column per emotion
emolex_df = emolex_df.pivot(index='word', columns='emotion', values='association').reset_index()

# rename df column
emolex_df.columns.name = 'index'

# filter out words without scores and with more than 7 scores
emolex_df = emolex_df[emolex_df.sum(axis=1)>0].reset_index(drop=True)
emolex_df = emolex_df[emolex_df.sum(axis=1)<7].reset_index(drop=True)
emolex_df

index,word,anger,anticipation,disgust,fear,joy,negative,positive,sadness,surprise,trust
0,abacus,0,0,0,0,0,0,0,0,0,1
1,abandon,0,0,0,1,0,1,0,1,0,0
2,abandoned,1,0,0,1,0,1,0,1,0,0
3,abandonment,1,0,0,1,0,1,0,1,1,0
4,abba,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
6437,zany,0,0,0,0,0,0,0,0,1,0
6438,zeal,0,1,0,0,1,0,1,0,1,1
6439,zealous,0,0,0,0,1,0,1,0,0,1
6440,zest,0,1,0,0,1,0,1,0,0,1


#### Using this lexicon, we can now we can now easily lookup all words from a single paragraph of the corpus:

In [39]:
paragraph_words = briefings_df.text[500].split()
emolex_df[pd.DataFrame(emolex_df.word.tolist()).isin(paragraph_words).any(1)]

index,word,anger,anticipation,disgust,fear,joy,nrc_negative,nrc_positive,sadness,surprise,trust
1238,big,0,0,0,0,0,0,0,0,0,0
1811,care,0,0,0,0,0,0,0,0,0,0
3088,cut,0,0,0,0,0,0,0,0,0,0
5094,force,1,0,0,1,0,1,0,0,0,0
6334,important,0,0,0,0,0,0,1,0,0,1
7759,matter,0,0,0,0,0,0,0,0,0,0
7814,meeting,0,0,0,0,0,0,0,0,0,0
9956,public,0,1,0,0,0,0,1,0,0,0
12549,task,0,0,0,0,0,0,1,0,0,0
12563,tax,0,0,0,0,0,1,0,1,0,0


#### Let's calculate and store aggregate emotion scores for each paragraph in the corpus:

In [5]:
# create empty df to store aggregated emotion calcs
data = pd.DataFrame([])

for text in briefings_df['text']:
    paragraph_words = text.split()
    paragraph_emos = emolex_df[pd.DataFrame(emolex_df.word.tolist()).isin(paragraph_words).any(1)].mean()
    data = data.append(paragraph_emos, ignore_index=True)
    
# combine aggregated emotion scores with transcript df
briefings_df = briefings_df.join(data)

# drop empty 'word' column, fill NaNs with zero
briefings_df = briefings_df.drop(columns=['word'])
briefings_df = briefings_df.fillna(0)

briefings_df

Unnamed: 0,date,timestamp,speaker,text,anger,anticipation,disgust,fear,joy,nrc_negative,nrc_positive,sadness,surprise,trust
0,2020-02-26,05:39,Donald Trump,Thank you very much everybody. Thank you very ...,0.136364,0.136364,0.090909,0.181818,0.090909,0.272727,0.181818,0.136364,0.090909,0.090909
1,2020-02-26,06:59,Donald Trump,A lot of people thought we shouldn’t have done...,0.055556,0.222222,0.111111,0.166667,0.055556,0.166667,0.222222,0.055556,0.055556,0.166667
2,2020-02-26,07:51,Donald Trump,We have a total of 15. We took in some from Ja...,0.000000,0.130435,0.000000,0.043478,0.173913,0.000000,0.391304,0.043478,0.086957,0.217391
3,2020-02-26,09:58,Donald Trump,China you know about. Where it started. I spok...,0.142857,0.214286,0.000000,0.142857,0.214286,0.214286,0.357143,0.000000,0.071429,0.214286
4,2020-02-26,10:52,Donald Trump,"We’re bringing in a specialist, a very highly ...",0.000000,0.153846,0.000000,0.076923,0.076923,0.230769,0.153846,0.076923,0.000000,0.153846
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9674,2020-04-27,01:01:23,Unnamed (Reporter),[crosstalk 00:13:23].,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
9675,2020-04-27,01:01:24,Donald Trump,"Let’s do one more. Please, in the back.",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
9676,2020-04-27,01:01:26,Unnamed,If an American president loses more Americans ...,0.333333,0.333333,0.000000,0.000000,0.000000,0.000000,0.666667,0.000000,0.000000,0.666667
9677,2020-04-27,01:01:36,Donald Trump,"So yeah, we’ve lost a lot of people, but if yo...",0.071429,0.142857,0.000000,0.071429,0.071429,0.142857,0.214286,0.071429,0.071429,0.071429


In [6]:
# save scores df to csv
briefings_df.to_csv("../data/scored_briefings.csv",index=False)

In [36]:
briefings_df[briefings_df.sum(axis=1) > 5]

Unnamed: 0,date,timestamp,speaker,text,anger,anticipation,disgust,fear,joy,nrc_negative,nrc_positive,sadness,surprise,trust
48,2020-02-26,33:52,Donald Trump,"Our consumers are incredible, they’re incredib...",1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0
1749,2020-03-19,39:28,John (Reporter),You can say that none of this money can go for...,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0
1910,2020-03-19,01:12:19,Donald Trump,They treat me very nicely. Go ahead.,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2845,2020-03-23,56:03,Donald Trump,"Death from the death, you’re talking about dea...",0.5,1.0,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
3867,2020-03-29,01:10:03,Unnamed,"Thank you, sir. I’d also like to ask you about...",1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4126,2020-03-31,37:03,Unnamed,"But doctor, when we look at the curve, it goes...",0.5,1.0,0.5,0.5,0.0,0.5,0.5,0.5,1.0,0.5
4937,2020-04-02,01:22:02,Journalist 5,What do you mean by cash payments?,1.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0
5590,2020-04-04,01:42:42,Donald Trump,"Both. Some states are doing when they land, th...",1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0
6352,2020-04-07,54:09,Donald Trump,You just said death counts?,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0
6551,2020-04-08,33:08,Donald Trump,I had a feeling it was done. Is that what he s...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
