### Analyzing User responses

In [None]:
import pandas as pd
import numpy as np
import sys

import matplotlib.pyplot as plt
# from bertopic import BERTopic
pd.set_option('display.max_colwidth', None)
from scipy import stats

In [3]:
user_resp = pd.read_csv('results/user_response.csv')
news_tweets = pd.read_csv('results/news_tweets.csv')
# news_tweets = pd.read_csv('results/older_sentiment/news_tweets.csv')

In [4]:
user_resp.publication.value_counts()

CNN                    332227
The Washington Post    219282
Fox News               119202
Breitbart News          67333
USA Today               17859
Business Insider         9893
Name: publication, dtype: int64

In [5]:
user_resp.shape

(765796, 43)

In [6]:
user_resp['publish_date'] = pd.to_datetime(user_resp['created_at'])
user_resp['day'] = user_resp.publish_date.dt.day
user_resp['month'] = user_resp.publish_date.dt.month
user_resp['year'] = user_resp.publish_date.dt.year
user_resp['timestamp'] = user_resp.publish_date.dt.time

In [7]:
user_resp.shape

(765796, 48)

In [8]:
user_resp.conversation_id.nunique(), news_tweets.conversation_id.nunique()

(2911, 24584)

In [9]:
R_user_resp = user_resp[((user_resp['publication'] == 'Breitbart News') | (user_resp['publication'] == 'Fox News'))]
L_user_resp = user_resp[((user_resp['publication'] == 'CNN') | (user_resp['publication'] == 'The Washington Post'))]
C_user_resp = user_resp[((user_resp['publication'] == 'Business Insider') | (user_resp['publication'] ==  'USA Today'))]

In [26]:
R_news_tweets = news_tweets[((news_tweets['publication'] == 'Breitbart News') | (news_tweets['publication'] == 'Fox News'))]
L_news_tweets = news_tweets[((news_tweets['publication'] == 'CNN') | (news_tweets['publication'] == 'The Washington Post'))]
C_news_tweets = news_tweets[((news_tweets['publication'] == 'Business Insider') | (news_tweets['publication'] ==  'USA Today'))]

In [27]:
R_news_tweets.shape, C_news_tweets.shape, L_news_tweets.shape

((4115, 49), (7362, 49), (13107, 49))

### Average number of user responses per news tweet

In [11]:
R_user_resp.shape[0]/R_user_resp.conversation_id.nunique(), L_user_resp.shape[0]/L_user_resp.conversation_id.nunique(), C_user_resp.shape[0]/C_user_resp.conversation_id.nunique()

(396.8829787234043, 313.5355315520182, 40.69208211143695)

In [12]:
cols = ['topics', 'like_count', 'quote_count', 'reply_count', 'retweet_count', 'anger', 'joy', 'optimism', 'sadness', 'trump_pos', 'trump_neg', 'trump_neu', 'biden_pos', 'biden_neg', 'biden_neu']

In [13]:
data = []
for top, grp in R_user_resp.groupby('topics'):
    num_resp = grp.shape[0]/grp.conversation_id.nunique()
    
    data.append((top, num_resp))
df_resp_pt_R = pd.DataFrame(data, columns = ['topics', 'resp_per_tweet'])

In [14]:
data = []
for top, grp in L_user_resp.groupby('topics'):
    num_resp = grp.shape[0]/grp.conversation_id.nunique()
    
    data.append((top, num_resp))
df_resp_pt_L = pd.DataFrame(data, columns = ['topics', 'resp_per_tweet'])

In [36]:
# for top in set(news_tweets.topics):
    
#     print(top, R_news_tweets[R_news_tweets.topics == top].Trump_flag.value_counts())

In [38]:
news_tweets[news_tweets.topics == 'capitol']

Unnamed: 0.7,Unnamed: 0.6,Unnamed: 0.5,Unnamed: 0.4,Unnamed: 0,Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,tweet_id,...,trump_pos_new,trump_neg_new,trump_neu_new,biden_pos_new,biden_neg_new,biden_neu_new,Trump_flag,Biden_flag,Trump_Biden_flag,topic_id
8,8,8,8,8,8,8,8,8,8,1355803043120480256,...,0.362204,0.424258,0.213537,,,,True,False,False,17
68,77,77,82,90,90,90,90,90,90,1354288962957938692,...,0.003119,0.990542,0.006339,,,,True,False,False,17
94,104,104,112,122,122,122,122,122,122,1353994874656346112,...,,,,0.416678,0.489520,0.093802,False,True,False,17
150,175,175,191,210,210,210,210,210,210,1352813578613620736,...,,,,0.387591,0.262706,0.349703,False,True,False,17
175,201,201,221,247,247,247,247,247,247,1352447411084472324,...,,,,0.032023,0.135405,0.832572,False,True,False,17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23593,25418,25418,33346,37283,37283,52087,52087,52087,52087,1287861933048311808,...,0.027021,0.053664,0.919315,,,,True,False,False,17
23746,25571,25571,33579,37538,37538,52342,52342,52342,52342,1279268195791253505,...,0.103578,0.052376,0.844046,,,,True,False,False,17
23889,25715,25715,33777,37756,37756,52560,52560,52560,52560,1271805293245222912,...,0.025168,0.090636,0.884196,,,,True,False,False,17
24390,26218,26218,34505,38577,38577,53381,53381,53381,53381,1239320470270476290,...,0.027032,0.857785,0.115184,,,,True,False,False,17


In [47]:
user_resp[user_resp.conversation_id.isin(news_tweets[news_tweets.topics == 'conspiracy_theory'].conversation_id)].subtopic.value_counts()

fact-check-general      9225
others-general          7005
democarts-kamala         800
republican-trump         528
social_media-general     320
republican-general       121
covid-mask               112
security-general          42
white_house-general        8
Name: subtopic, dtype: int64

In [41]:
user_resp[user_resp.topics == 'fact-check'].subtopic.value_counts()

fact-check-general       13204
fact-check-misleading     2351
Name: subtopic, dtype: int64

In [266]:
data = []
for top, grp in C_user_resp.groupby('topics'):
    num_resp = grp.shape[0]/grp.conversation_id.nunique()
    
    data.append((top, num_resp))
df_resp_pt_C = pd.DataFrame(data, columns = ['topics', 'resp_per_tweet'])

In [277]:
df_resp_pt_L.merge(df_resp_pt_C, on = 'topics').merge(df_resp_pt_R, on = 'topics').round()

Unnamed: 0,topics,resp_per_tweet_x,resp_per_tweet_y,resp_per_tweet
0,america,233.0,77.0,192.0
1,blm,297.0,33.0,230.0
2,capitol,266.0,35.0,623.0
3,covid,267.0,67.0,608.0
4,democarts,226.0,52.0,332.0
5,economy,208.0,17.0,421.0
6,election,268.0,40.0,547.0
7,fact-check,467.0,31.0,243.0
8,immigration,125.0,3.0,96.0
9,international,391.0,17.0,45.0


In [502]:
C_user_resp.shape[0]/C_user_resp.conversation_id.nunique(), L_user_resp.shape[0]/L_user_resp.conversation_id.nunique(), R_user_resp.shape[0]/R_user_resp.conversation_id.nunique()

(40.69208211143695, 313.5355315520182, 396.8829787234043)

In [226]:
# R_user_resp[cols].describe()

In [213]:
# L_user_resp[cols].describe()

In [214]:
# C_user_resp[cols].describe()

In [286]:
cols = ['topics', 'like_count', 'quote_count', 'reply_count', 'retweet_count', 'anger', 'joy', 'optimism', 'sadness', 'trump_pos', 'trump_neg', 'trump_neu', 'biden_pos', 'biden_neg', 'biden_neu']

### Compare mean emotion values (L vs R vs C) across topics

In [287]:
R_user_resp_topic_mean = R_user_resp[cols].groupby('topics').mean()
L_user_resp_topic_mean = L_user_resp[cols].groupby('topics').mean()
C_user_resp_topic_mean = C_user_resp[cols].groupby('topics').mean()

In [289]:
R_user_resp_topic_mean.to_csv('results/user_resp_analysis/R_mean_emotions.csv')
L_user_resp_topic_mean.to_csv('results/user_resp_analysis/L_mean_emotions.csv')
C_user_resp_topic_mean.to_csv('results/user_resp_analysis/C_mean_emotions.csv')

#### Comparison based on Positive or negative mentions of Trump and Biden

In [367]:
def get_pos_neg_mentions(df, entity):
    
    entity_mentions = df[df[entity.title() + '_flag'] == True]
    pos_mean = entity_mentions[entity + '_pos'].mean()
    neu_mean = entity_mentions[entity + '_neu'].mean()
    neg_mean = entity_mentions[entity + '_neg'].mean()
    pos_entity_mentions = entity_mentions[((entity_mentions[entity + '_pos'] > pos_mean))]
    neg_entity_mentions = entity_mentions[((entity_mentions[entity + '_neg'] > neg_mean))]
    
    return pos_entity_mentions, neg_entity_mentions

In [368]:
T_pos_mentions_R,  T_neg_mentions_R = get_pos_neg_mentions(R_user_resp, "trump")
B_pos_mentions_R,  B_neg_mentions_R = get_pos_neg_mentions(R_user_resp, "biden")

T_pos_mentions_L,  T_neg_mentions_L = get_pos_neg_mentions(L_user_resp, "trump")
B_pos_mentions_L,  B_neg_mentions_L = get_pos_neg_mentions(L_user_resp, "biden")

In [369]:
# import seaborn as sns
# sns.displot(R_user_resp.trump_pos)

In [370]:
T_pos_mentions_R.shape, T_neg_mentions_R.shape, B_pos_mentions_R.shape, B_neg_mentions_R.shape

((21919, 48), (57444, 48), (47445, 48), (38275, 48))

In [371]:
T_pos_mentions_L.shape, T_neg_mentions_L.shape, B_pos_mentions_L.shape, B_neg_mentions_L.shape

((112717, 48), (246742, 48), (73686, 48), (56858, 48))

In [377]:
def get_mean_emotions_for_pos_neg_entity_mentions(df1, df2, ent, cols):
    
    return pd.concat((pd.DataFrame(df1[cols].mean(), columns = [ent + '_R']), df2[cols].mean()), axis = 1).rename(columns={0: ent + "_L"})
    # return pd.concat((pd.DataFrame(df1[cols].mean(), columns = [ent + '_pos']), df2[cols].mean()), axis = 1).rename(columns={0: ent + "_neg"})

_df1 = get_mean_emotions_for_pos_neg_entity_mentions(T_pos_mentions_R, T_pos_mentions_L, ent = 'Trump_pos', cols = ['anger', 'joy', 'optimism', 'sadness'])
_df2 = get_mean_emotions_for_pos_neg_entity_mentions(T_neg_mentions_R, T_neg_mentions_L, ent = 'Trump_neg', cols = ['anger', 'joy', 'optimism', 'sadness'])
_df3 = get_mean_emotions_for_pos_neg_entity_mentions(B_pos_mentions_R, B_pos_mentions_L, ent = 'Biden_pos', cols = ['anger', 'joy', 'optimism', 'sadness'])
_df4 = get_mean_emotions_for_pos_neg_entity_mentions(B_neg_mentions_R, B_neg_mentions_L, ent = 'Biden_neg', cols = ['anger', 'joy', 'optimism', 'sadness'])

# _df1 = get_mean_emotions_for_pos_neg_entity_mentions(T_pos_mentions_R, T_neg_mentions_R, ent = 'Trump_R', cols = ['anger', 'joy', 'optimism', 'sadness'])
# _df2 = get_mean_emotions_for_pos_neg_entity_mentions(T_pos_mentions_L, T_neg_mentions_L, ent = 'Trump_L', cols = ['anger', 'joy', 'optimism', 'sadness'])
# _df3 = get_mean_emotions_for_pos_neg_entity_mentions(B_pos_mentions_R, B_neg_mentions_R, ent = 'Biden_R', cols = ['anger', 'joy', 'optimism', 'sadness'])
# _df4 = get_mean_emotions_for_pos_neg_entity_mentions(B_pos_mentions_L, B_neg_mentions_L, ent = 'Biden_L', cols = ['anger', 'joy', 'optimism', 'sadness'])

comb_df = pd.concat((_df1, _df2, _df3, _df4), axis = 1)
comb_df

# comb_df.to_csv('results/user_resp_analysis/mean_emotions_for_pos_vs_neg_mentions.csv')

Unnamed: 0,Trump_pos_R,Trump_pos_L,Trump_neg_R,Trump_neg_L,Biden_pos_R,Biden_pos_L,Biden_neg_R,Biden_neg_L
anger,0.653085,0.64108,0.683769,0.663073,0.621833,0.551197,0.67502,0.672712
joy,0.107816,0.112407,0.109157,0.109547,0.136809,0.152836,0.110584,0.104745
optimism,0.121151,0.12076,0.099699,0.110665,0.115785,0.166967,0.099052,0.108752
sadness,0.11795,0.125754,0.107374,0.116715,0.125575,0.129,0.115349,0.113788


In [386]:
cols = ['anger', 'joy', 'optimism', 'sadness']

In [402]:
pd.concat((B_pos_mentions_L.groupby('topics').mean()[cols].rename(columns = {'anger': 'anger_B-L-pos', 'joy': 'joy_B-L-pos', 'optimism': 'optimism_B-L-pos', 'sadness': 'sadness_B-L-pos'}),
           B_neg_mentions_L.groupby('topics').mean()[cols].rename(columns = {'anger': 'anger_B-L-neg', 'joy': 'joy_B-L-neg', 'optimism': 'optimism_B-L-neg', 'sadness': 'sadness_B-L-neg'}),
           B_pos_mentions_R.groupby('topics').mean()[cols].rename(columns = {'anger': 'anger_B-R-pos', 'joy': 'joy_B-R-pos', 'optimism': 'optimism_B-R-pos', 'sadness': 'sadness_B-R-pos'}),
           B_neg_mentions_R.groupby('topics').mean()[cols].rename(columns = {'anger': 'anger_B-R-neg', 'joy': 'joy_B-R-neg', 'optimism': 'optimism_B-R-neg', 'sadness': 'sadness_B-R-neg'}),
           T_pos_mentions_L.groupby('topics').mean()[cols].rename(columns = {'anger': 'anger_T-L-pos', 'joy': 'joy_T-L-pos', 'optimism': 'optimism_T-L-pos', 'sadness': 'sadness_T-L-pos'}),
           T_neg_mentions_L.groupby('topics').mean()[cols].rename(columns = {'anger': 'anger_T-L-neg', 'joy': 'joy_T-L-neg', 'optimism': 'optimism_T-L-neg', 'sadness': 'sadness_T-L-neg'}),
           T_pos_mentions_R.groupby('topics').mean()[cols].rename(columns = {'anger': 'anger_T-R-pos', 'joy': 'joy_T-R-pos', 'optimism': 'optimism_T-R-pos', 'sadness': 'sadness_T-R-pos'}),
           T_neg_mentions_R.groupby('topics').mean()[cols].rename(columns = {'anger': 'anger_T-R-neg', 'joy': 'joy_T-R-neg', 'optimism': 'optimism_T-R-neg', 'sadness': 'sadness_T-R-neg'})), 
           axis = 1).to_csv('results/user_resp_analysis/mean_emotions_pos_neg_men_topic_wise.csv')

  pd.concat((B_pos_mentions_L.groupby('topics').mean()[cols].rename(columns = {'anger': 'anger_B-L-pos', 'joy': 'joy_B-L-pos', 'optimism': 'optimism_B-L-pos', 'sadness': 'sadness_B-L-pos'}),
  B_neg_mentions_L.groupby('topics').mean()[cols].rename(columns = {'anger': 'anger_B-L-neg', 'joy': 'joy_B-L-neg', 'optimism': 'optimism_B-L-neg', 'sadness': 'sadness_B-L-neg'}),
  B_pos_mentions_R.groupby('topics').mean()[cols].rename(columns = {'anger': 'anger_B-R-pos', 'joy': 'joy_B-R-pos', 'optimism': 'optimism_B-R-pos', 'sadness': 'sadness_B-R-pos'}),
  B_neg_mentions_R.groupby('topics').mean()[cols].rename(columns = {'anger': 'anger_B-R-neg', 'joy': 'joy_B-R-neg', 'optimism': 'optimism_B-R-neg', 'sadness': 'sadness_B-R-neg'}),
  T_pos_mentions_L.groupby('topics').mean()[cols].rename(columns = {'anger': 'anger_T-L-pos', 'joy': 'joy_T-L-pos', 'optimism': 'optimism_T-L-pos', 'sadness': 'sadness_T-L-pos'}),
  T_neg_mentions_L.groupby('topics').mean()[cols].rename(columns = {'anger': 'anger_T-L-n

In [398]:
B_neg_mentions_R.groupby('topics').mean()[cols].rename(columns = {'anger': 'anger_B-R-neg', 'joy': 'joy_B-R-neg', 'optimism': 'optimism_B-R-neg', 'sadness': 'sadness_B-R-neg'})

  B_neg_mentions_R.groupby('topics').mean()[cols].rename(columns = {'anger': 'anger_B-R-neg', 'joy': 'joy_B-R-neg', 'optimism': 'optimism_B-R-neg', 'sadness': 'sadness_B-R-neg'})


Unnamed: 0_level_0,anger_B-R-neg,joy_B-R-neg,optimism_B-R-neg,sadness_B-R-neg
topics,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
america,0.599,0.077057,0.127186,0.196886
blm,0.658134,0.15712,0.087778,0.096962
capitol,0.76905,0.068542,0.070319,0.092088
covid,0.646907,0.098998,0.105707,0.148396
democarts,0.684946,0.100074,0.102336,0.112639
election,0.603425,0.132098,0.135438,0.129051
fact-check,0.732947,0.080573,0.082953,0.103545
immigration,0.714724,0.072782,0.094827,0.117631
international,0.561167,0.218722,0.068722,0.151556
others,0.670422,0.115699,0.098352,0.115534


In [375]:
comb_df

Unnamed: 0,Trump_R_pos,Trump_R_neg,Trump_L_pos,Trump_L_neg,Biden_R_pos,Biden_R_neg,Biden_L_pos,Biden_L_neg
anger,0.653085,0.683769,0.64108,0.663073,0.621833,0.67502,0.551197,0.672712
joy,0.107816,0.109157,0.112407,0.109547,0.136809,0.110584,0.152836,0.104745
optimism,0.121151,0.099699,0.12076,0.110665,0.115785,0.099052,0.166967,0.108752
sadness,0.11795,0.107374,0.125754,0.116715,0.125575,0.115349,0.129,0.113788


((112717, 48), (246742, 48), (73686, 48), (56858, 48))

In [304]:
L_user_resp.shape

(551509, 48)

In [458]:
def get_normalized_dist(dist):
    
    return (dist - dist.min()) / (dist.max() - dist.min())

def get_normalized_emotion_scores(df):
    
    df['anger_norm'] = get_normalized_dist(df.anger)
    df['optimism_norm'] = get_normalized_dist(df.optimism)
    df['sadness_norm'] = get_normalized_dist(df.sadness)
    df['joy_norm'] = get_normalized_dist(df.joy)
    
    return df

In [472]:
# T_pos_mentions_R = get_normalized_emotion_scores(T_pos_mentions_R)
# B_neg_mentions_R = get_normalized_emotion_scores(B_neg_mentions_R)
# T_neg_mentions_R = get_normalized_emotion_scores(T_neg_mentions_R)
# B_pos_mentions_R = get_normalized_emotion_scores(B_pos_mentions_R)

# T_pos_mentions_L = get_normalized_emotion_scores(T_pos_mentions_L)
# B_neg_mentions_L = get_normalized_emotion_scores(B_neg_mentions_L)
# T_neg_mentions_L = get_normalized_emotion_scores(T_neg_mentions_L)
# B_pos_mentions_L = get_normalized_emotion_scores(B_pos_mentions_L)

In [473]:
# R_fav = pd.concat((T_pos_mentions_R, B_neg_mentions_R), axis = 0)
# R_unfav = pd.concat((T_neg_mentions_R, B_pos_mentions_R), axis = 0)
# R_fav.shape, R_unfav.shape

# L_fav = pd.concat((T_neg_mentions_L, B_pos_mentions_L), axis = 0)
# L_unfav = pd.concat((T_pos_mentions_L, B_neg_mentions_L), axis = 0)
# L_fav.shape, L_unfav.shape

((320428, 52), (169575, 52))

In [488]:
def get_pos_and_neg_mentions(df, pub, ent):

    pos_mentions = df[((df['publication'] == pub) & (df[ent + '_pos'] > df[ent + '_pos'].mean()))]
    neg_mentions = df[((df['publication'] == pub) & (df[ent + '_neg'] > df[ent + '_neg'].mean()))]
    
    return pos_mentions, neg_mentions

In [489]:
T_pos_men_CNN, T_neg_men_CNN = get_pos_and_neg_mentions(user_resp, 'CNN', 'trump')
B_pos_men_CNN, B_neg_men_CNN = get_pos_and_neg_mentions(user_resp, 'CNN', 'biden')

T_pos_men_TWP, T_neg_men_TWP = get_pos_and_neg_mentions(user_resp, 'The Washington Post', 'trump')
B_pos_men_TWP, B_neg_men_TWP = get_pos_and_neg_mentions(user_resp, 'The Washington Post', 'biden')

T_pos_men_UST, T_neg_men_UST = get_pos_and_neg_mentions(user_resp, 'USA Today', 'trump')
B_pos_men_UST, B_neg_men_UST = get_pos_and_neg_mentions(user_resp, 'USA Today', 'biden')

T_pos_men_BI, T_neg_men_BI = get_pos_and_neg_mentions(user_resp, 'Business Insider', 'trump')
B_pos_men_BI, B_neg_men_BI = get_pos_and_neg_mentions(user_resp, 'Business Insider', 'biden')

T_pos_men_FN, T_neg_men_FN = get_pos_and_neg_mentions(user_resp, 'Fox News', 'trump')
B_pos_men_FN, B_neg_men_FN = get_pos_and_neg_mentions(user_resp, 'Fox News', 'biden')

T_pos_men_BN, T_neg_men_BN = get_pos_and_neg_mentions(user_resp, 'Breitbart News', 'trump')
B_pos_men_BN, B_neg_men_BN = get_pos_and_neg_mentions(user_resp, 'Breitbart News', 'biden')

In [498]:
R_fav = pd.concat((T_pos_men_FN, T_pos_men_BN, B_neg_men_FN, B_neg_men_BN), axis = 0)
R_unfav = pd.concat((T_neg_men_FN, T_neg_men_BN, B_pos_men_FN, B_pos_men_BN), axis = 0)
print(R_fav.shape, R_unfav.shape)

L_fav = pd.concat((B_pos_men_CNN, B_pos_men_TWP, T_neg_men_CNN, T_neg_men_TWP), axis = 0)
L_unfav = pd.concat((T_pos_men_CNN, T_pos_men_TWP, B_neg_men_CNN, B_neg_men_TWP,), axis = 0)
L_fav.shape, L_unfav.shape

(60316, 48) (101963, 48)


((322635, 48), (167388, 48))

In [494]:
cols = ['anger', 'optimism', 'sadness', 'joy']
pd.DataFrame(L_fav[cols].mean())

Unnamed: 0,0
anger,0.637492
optimism,0.123575
sadness,0.119448
joy,0.119485


In [495]:
pd.DataFrame(L_unfav[cols].mean())

Unnamed: 0,0
anger,0.65108
optimism,0.116906
sadness,0.121676
joy,0.110337


In [496]:
pd.DataFrame(R_fav[cols].mean())

Unnamed: 0,0
anger,0.666838
optimism,0.107213
sadness,0.116315
joy,0.109639


In [497]:
pd.DataFrame(R_unfav[cols].mean())

Unnamed: 0,0
anger,0.655879
optimism,0.106865
sadness,0.115183
joy,0.122073


In [474]:
def get_mean_emotions_for_pos_neg_entity_mentions(df1, df2, ent, cols):
    
    return pd.concat((pd.DataFrame(df1[cols].mean(), columns = [ent + '_R']), df2[cols].mean()), axis = 1).rename(columns={0: ent + "_L"})

_df1 = get_mean_emotions_for_pos_neg_entity_mentions(R_fav, R_unfav, ent = 'Right', cols = ['anger', 'joy', 'optimism', 'sadness'])
_df2 = get_mean_emotions_for_pos_neg_entity_mentions(L_fav, L_unfav, ent = 'Left', cols = ['anger', 'joy', 'optimism', 'sadness'])
# _df3 = get_mean_emotions_for_pos_neg_entity_mentions(T_pos_mentions_L, T_neg_mentions_L, ent = 'Trump_L', cols = ['anger', 'joy', 'optimism', 'sadness'])
# _df4 = get_mean_emotions_for_pos_neg_entity_mentions(B_pos_mentions_L, B_neg_mentions_L, ent = 'Biden_L', cols = ['anger', 'joy', 'optimism', 'sadness'])
comb_df = pd.concat((_df1, _df2), axis = 1)
comb_df

Unnamed: 0,Right_R,Right_L,Left_R,Left_L
anger,0.667032,0.655753,0.637346,0.651686
joy,0.109576,0.121665,0.119502,0.109838
optimism,0.107099,0.106975,0.123612,0.116734
sadness,0.116296,0.115607,0.11954,0.121742


In [468]:
R_fav.columns

Index(['Unnamed: 0.5', 'Unnamed: 0.4', 'Unnamed: 0.3', 'Unnamed: 0.2',
       'Unnamed: 0.1', 'Unnamed: 0', 'tweet_id', 'conversation_id',
       'author_id', 'created_at', 'geo', 'lang', 'like_count', 'quote_count',
       'reply_count', 'retweet_count', 'source', 'text', 'anger', 'joy',
       'optimism', 'sadness', 'publication', 'topic_labels', 'theme',
       'pos_senti', 'neu_senti', 'neg_senti', 'date', 'week', 'flag',
       'Trump_flag', 'Biden_flag', 'Trump_Biden_flag', 'trump_pos',
       'trump_neg', 'trump_neu', 'biden_pos', 'biden_neg', 'biden_neu',
       'topics', 'topic_ids', 'subtopic', 'publish_date', 'day', 'month',
       'year', 'timestamp', 'anger_norm', 'optimism_norm', 'sadness_norm',
       'joy_norm'],
      dtype='object')

In [475]:
def get_mean_emotions_for_pos_neg_entity_mentions(df1, df2, ent, cols):
    
    return pd.concat((pd.DataFrame(df1[cols].mean(), columns = [ent + '_R']), df2[cols].mean()), axis = 1).rename(columns={0: ent + "_L"})

_df1 = get_mean_emotions_for_pos_neg_entity_mentions(R_fav, R_unfav, ent = 'Right', cols = ['anger_norm', 'optimism_norm', 'sadness_norm', 'joy_norm'])
_df2 = get_mean_emotions_for_pos_neg_entity_mentions(L_fav, L_unfav, ent = 'Left', cols = ['anger_norm', 'optimism_norm', 'sadness_norm', 'joy_norm'])
# _df3 = get_mean_emotions_for_pos_neg_entity_mentions(T_pos_mentions_L, T_neg_mentions_L, ent = 'Trump_L', cols = ['anger', 'joy', 'optimism', 'sadness'])
# _df4 = get_mean_emotions_for_pos_neg_entity_mentions(B_pos_mentions_L, B_neg_mentions_L, ent = 'Biden_L', cols = ['anger', 'joy', 'optimism', 'sadness'])
comb_df = pd.concat((_df1, _df2), axis = 1)
comb_df

Unnamed: 0,Right_R,Right_L,Left_R,Left_L
anger_norm,0.675298,0.663067,0.644326,0.659211
optimism_norm,0.111365,0.110252,0.126447,0.120314
sadness_norm,0.114471,0.11371,0.118381,0.119854
joy_norm,0.109963,0.122335,0.120923,0.111058


In [471]:
cols = ['anger_norm', 'optimism_norm', 'sadness_norm', 'joy_norm']
R_fav[cols]

Unnamed: 0,anger_norm,optimism_norm,sadness_norm,joy_norm
21396,0.028542,0.787781,0.112130,0.114907
21397,0.272171,0.090032,0.638124,0.008282
21398,0.919470,0.031083,0.040775,0.015528
21399,0.961264,0.010718,0.022426,0.011387
21400,0.525994,0.026795,0.443425,0.008282
...,...,...,...,...
760809,,,,
760810,,,,
760811,,,,
760812,,,,


['anger', 'joy', 'optimism', 'sadness']

### Mean emotions across topics (L vs R)

In [305]:
df = pd.DataFrame()
cols = ['anger', 'joy', 'optimism', 'sadness']
df = pd.concat((df, L_user_resp[cols].describe().loc['mean']), axis = 1)
df = pd.concat((df, C_user_resp[cols].describe().loc['mean']), axis = 1)
df = pd.concat((df, R_user_resp[cols].describe().loc['mean']), axis = 1)
# df.T.to_csv('results/user_resp_analysis/mean_emotions.csv')
df.T

Unnamed: 0,anger,joy,optimism,sadness
mean,0.637153,0.118623,0.120524,0.123699
mean,0.617871,0.129014,0.123748,0.129362
mean,0.654856,0.11877,0.108667,0.117708


### Fav-vs-UnFav

In [None]:
# # df_L_T = L_user_resp[L_user_resp.Trump_flag == True]

# L_unfav1, L_fav1 = get_pos_neg_mentions(L_user_resp[L_user_resp.Trump_flag == True], 'trump')
# L_fav2, L_unfav2 = get_pos_neg_mentions(L_user_resp[L_user_resp.Biden_flag == True], 'biden')

# L_fav = pd.concat((L_fav1, L_fav2), axis = 0)
# L_unfav = pd.concat((L_unfav1, L_unfav2), axis = 0)

# # df_L_T = L_user_resp[L_user_resp.Trump_flag == True]

# R_fav1, R_unfav1 = get_pos_neg_mentions(R_user_resp[R_user_resp.Trump_flag == True], 'trump')
# R_unfav2, R_fav2 = get_pos_neg_mentions(R_user_resp[R_user_resp.Biden_flag == True], 'biden')

# R_fav = pd.concat((R_fav1, R_fav2), axis = 0)
# R_unfav = pd.concat((R_unfav1, R_unfav2), axis = 0)