# Libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from wordcloud import WordCloud
import geopandas
pd.options.display.max_columns = 100
plt.style.use(['default'])

# Constants

In [3]:
themes = [
    'Authorities & Politics', 'Cases and deaths', 'Economic impact', 'Educational impact', 'People stories',
       'Preventive measures', 'Vaccines and vaccination', 'Virus spreading'
]

subthemes = [
    'subtheme11', 'subtheme12', 'subtheme21', 'subtheme22', 'subtheme41', 'subtheme42', 'subtheme43', 
    'subtheme44', 'subtheme51', 'subtheme52', 'subtheme53', 'subtheme61', 'subtheme62', 'subtheme63', 
    'subtheme64', 'subtheme65', 'subtheme66', 'subtheme71', 'subtheme72', 'subtheme73', 'subtheme74'
]

emotions = [
    'anger', 'sadness', 'optimism', 'joy'
]

keywords = [
    'remote working', 'homeschooling', 'panic buying', 'sars-cov-2', 'wearing masks', 'ncov', 'wuhan', 
    'social distancing', 'vaccination', 'quarantine', 'outbreak', 'vaccine', 'lockdown', 'pandemic', 
    'coronavirus', 'covid'
]
keywords2 = [w.replace(' ', '_').replace('-', '_') for w in keywords]

# Functions

In [4]:
def millions(x, pos):
    'The two args are the value and tick position'
    return '%1.1fM' % (x * 1e-6)

def thousands(x, pos):
    'The two args are the value and tick position'
    return '%1.fm' % (x * 1e-3)

# Load and process lookup tables

In [5]:
# Load lookup tables
media_list = pd.read_csv('./../data/external/MediaList.csv', sep=";")
theme_desc = pd.read_csv('./../data/external/theme_desc.csv')
subtheme_desc = pd.read_csv('./../data/external/subtheme_desc.csv')
#topics = pd.read_parquet('./news_tweets_topics.parquet')
topics = pd.read_parquet('./../data/processed/news_tweets_topics_v4.parquet')
news_accounts = pd.read_parquet('./../data/raw/news_accounts.parquet')
users = pd.read_parquet('./../data/raw/users.parquet')

# Process lookup tables
topics['tweetId'] = topics.tweetId.astype('float64')
news_accounts = news_accounts.merge(media_list)
news_accounts['userId'] = news_accounts.userId.astype('float64')

# Load and process News Tweets

In [6]:
# Load news tweets
news_tweets = pd.read_parquet('./../data/raw/news_tweets_with_em_scores.parquet')

# Define prevalent emotion and emotion score
news_tweets['emotion_score'] = news_tweets[['anger','joy','optimism','sadness']].max(axis=1)
news_tweets['prevalent_emotion'] = news_tweets[['anger','joy','optimism','sadness']].idxmax(axis=1)
news_tweets['prevalent_emotion'] = np.where(news_tweets.emotion_score>0.5, news_tweets.prevalent_emotion, 'undefined')

# Include country information and filter valid news accountsw
news_tweets = news_tweets.merge(news_accounts[['userId','country']])
news_tweets = news_tweets[~news_tweets[emotions].isnull().any(axis=1)]
news_tweets = news_tweets.sort_values('date').drop_duplicates('conversationId', keep='first')
news_tweets = news_tweets[news_tweets.lang=='en']
print('Number of valid news tweets:', news_tweets.shape[0])

# Include theme and subtheme
news_tweets = news_tweets[~news_tweets.conversationId.isnull()].merge(topics[['theme'] + subthemes + ['tweetId']], how='left', on='tweetId')
news_tweets['subtheme'] = news_tweets[subthemes].idxmax(axis=1)
news_tweets['subtheme'] = news_tweets.subtheme.apply(lambda x: int(x[-2:]))
news_tweets['aux'] = news_tweets[subthemes].sum(axis=1)
news_tweets.loc[news_tweets.aux==0,'subtheme'] = news_tweets.loc[news_tweets.aux==0,'theme']*10
news_tweets = news_tweets.drop(subthemes + ['aux'], axis=1)

# Include date and week features
news_tweets['ds'] = news_tweets.date.dt.date
news_tweets['dsw'] = (news_tweets['date'] + pd.offsets.Week(weekday=6)).dt.date

Number of valid news tweets: 1678217


# Load and process Comments

In [7]:
# Load valid comments
plain_comments = pd.read_parquet('./../data/raw/comments.parquet')
comments = pd.read_parquet('./../data/raw/user_tweets_with_em_scores.parquet')
comments = comments[comments.tweetId.isin(plain_comments.tweetId)]; del plain_comments
comments = comments[comments.conversationId.isin(news_tweets.conversationId)]
comments = comments[~comments[emotions].isnull().any(axis=1)]
comments = comments[comments.lang=='en']
print('Number of valid comments:', comments.shape[0])

# Include news tweet data
comments = comments.merge(news_tweets.rename(columns={
    'date':'news_date', 'userId':'news_id', 'prevalent_emotion':'news_prevalent_emotion', 'emotion_score':'news_emotion_score',
    'anger':'news_anger', 'sadness':'news_sadness', 'optimism':'news_optimism', 'joy':'news_joy'
})[[
    'conversationId', 'news_date', 'news_id', 'news_prevalent_emotion', 'news_emotion_score', 
    'news_anger', 'news_sadness', 'news_optimism', 'news_joy', 'theme', 'subtheme', 'country'
]], how='left')

# Define prevalent emotion and emotion score
comments['emotion_score'] = comments[['anger','joy','optimism','sadness']].max(axis=1)
comments['prevalent_emotion'] = comments[['anger','joy','optimism','sadness']].idxmax(axis=1)
comments['prevalent_emotion'] = np.where(comments.emotion_score>0.5, comments.prevalent_emotion, 'undefined')

# Define continent information
comments = comments.merge(pd.DataFrame({
    'country':['AU', 'UK', 'US', 'CA', 'NZ', 'ZA', 'KE', 'NG', 'IN', 'PH', 'MY','IE'],
    'continent':['Oceania', 'Europe', 'America', 'America', 'Oceania', 'Africa', 'Africa', 'Africa', 'Asia', 'Asia', 'Asia', 'Europe']
}))

# Include date, week and month features
comments['ds'] = comments.news_date.dt.date
comments['dsw'] = (comments['news_date'] + pd.offsets.Week(weekday=6)).dt.date
comments['dsm'] = pd.to_datetime(comments.news_date.dt.year.astype(str) + '-' + comments.news_date.dt.month.astype(str) + '-1')

Number of valid comments: 17620904


In [8]:
news_tweets.userId.nunique()

275

# Tables

### General emotion mean

In [9]:
g = comments[emotions].mean()

### Table: News Tweets Engagement

In [10]:
np.round(news_tweets.groupby('prevalent_emotion')[['replyCount', 'retweetCount', 'likeCount', 'quoteCount']].mean(),1)

Unnamed: 0_level_0,replyCount,retweetCount,likeCount,quoteCount
prevalent_emotion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
anger,20.7,36.8,113.2,11.0
joy,8.0,22.5,97.7,7.5
optimism,11.0,22.6,87.8,6.7
sadness,9.5,24.9,66.7,6.6
undefined,10.6,22.4,73.0,7.2


### Table: Emotion over Themes

In [11]:
eot = comments.groupby(['theme']) \
       .agg({'anger':'mean', 'sadness':'mean', 'optimism':'mean', 'joy':'mean', 'tweetId':'count'}) \
       .reset_index()

for col in ['anger', 'sadness', 'optimism', 'joy']:
    eot['lift_' + col] = np.round(eot[col] / g[col], 2)
    
eot = eot[['theme'] + ['lift_' + col for col in emotions]]
eot

Unnamed: 0,theme,lift_anger,lift_sadness,lift_optimism,lift_joy
0,1,0.94,1.19,1.02,0.94
1,2,1.01,0.95,1.03,1.0
2,3,0.95,1.07,1.06,1.06
3,4,0.93,1.2,0.97,1.02
4,5,1.07,0.89,0.93,0.96
5,6,0.99,1.0,0.98,1.07
6,7,1.01,1.01,1.0,0.93
7,8,0.93,0.98,1.21,1.09


### Table: Top News Accounts per Emotion over Themes

In [120]:
accounts_emotions = comments.groupby(['continent', 'country', 'news_id']) \
       .agg({'anger':'mean', 'sadness':'mean', 'optimism':'mean', 'joy':'mean', 'tweetId':'count'}) \
       .reset_index()

for col in ['anger', 'sadness', 'optimism', 'joy']:
    accounts_emotions['lift_' + col] = accounts_emotions[col] / g[col]

accounts_emotions[['lift_'+col for col in emotions]].max()

engaged_news = news_tweets.groupby('userId').tweetId.count()
engaged_news = list(engaged_news[engaged_news > 484].index)

accounts_emotions = accounts_emotions[
    (accounts_emotions.news_id.isin(engaged_news)) & (accounts_emotions.tweetId > 484)
].merge(news_accounts.drop('country',axis=1), left_on='news_id', right_on='userId', how='left')

In [121]:
top_news = pd.DataFrame(columns=accounts_emotions.columns)
for emotion in emotions:
    tmp = accounts_emotions[~accounts_emotions.news_id.isin(top_news.news_id.values)].sort_values(emotion).tail(5)
    tmp['main_emotion'] = emotion
    tmp['rank_emotion'] = range(1,6)
    tmp['score_emotion'] = tmp[emotion].values
    top_news = pd.concat([top_news, tmp])

In [131]:
account_emotions_themes = news_tweets[
    news_tweets.userId.isin(top_news.userId.values)
].groupby('userId').theme.value_counts(normalize=True).rename('prop').reset_index().merge(theme_desc)

account_emotions_themes = account_emotions_themes.pivot('userId', 'theme_desc', 'prop').reset_index()

account_emotions_themes = account_emotions_themes.merge(
    news_accounts[['userId', 'displayname', 'description', 'country', 'followersCount']]
).merge(
    top_news[['userId', 'main_emotion', 'rank_emotion', 'score_emotion']]
)[['displayname', 'country', 'followersCount', 'description', 'main_emotion', 'rank_emotion', 'score_emotion'] + themes]

account_emotions_themes.iloc[:,-9:] = np.round(account_emotions_themes.iloc[:,-9:].values * 100,1)
account_emotions_themes.to_csv('./../data/processed/account_emotions_themes.csv', index=False, sep=';')
account_emotions_themes

Unnamed: 0,displayname,country,followersCount,description,main_emotion,rank_emotion,score_emotion,Authorities & Politics,Cases and deaths,Economic impact,Educational impact,People stories,Preventive measures,Vaccines and vaccination,Virus spreading
0,Fox News,US,20185319,"Follow America's #1 cable news network, delive...",anger,4.0,61.8,30.4,4.3,21.2,4.0,9.5,14.7,9.2,6.7
1,MSNBC,US,4219331,"The place for in-depth analysis, political com...",anger,2.0,60.5,28.5,7.8,24.5,5.0,7.8,10.9,6.6,8.7
2,Business Line,IN,81101,Business Daily from The Hindu group of newspap...,optimism,2.0,30.8,15.2,10.7,35.6,2.0,5.1,13.4,8.4,9.7
3,CNN Politics,US,4136271,"Political news, campaign stories and Washingto...",anger,5.0,61.8,33.3,4.4,29.0,2.9,7.2,11.3,3.7,8.0
4,Manila Bulletin News,PH,858950,Breaking news and stories from different sides...,optimism,1.0,27.0,11.4,15.0,23.9,2.9,10.6,22.0,8.2,6.2
5,BellaNaija,NG,1787670,"We LOVE Everything Fab & African - Fashion, Mu...",joy,3.0,27.1,20.9,4.6,39.0,7.0,4.3,16.0,1.9,6.4
6,BusinessMirror,PH,36128,A broader look at today’s business,optimism,5.0,39.6,7.0,5.3,39.9,4.0,3.8,27.0,6.0,7.1
7,Oneindia News,IN,64578,oneindia.com is a gamut of online resources. F...,sadness,2.0,27.4,25.1,13.8,15.3,2.0,9.5,20.8,7.7,5.8
8,BBC Radio 4,UK,530100,Your friendly lockdown companion - documentari...,joy,5.0,38.2,30.1,2.1,24.5,5.1,9.1,18.0,4.1,7.0
9,Mid Day,IN,670388,All things #MadeinMumbai \nNews | Entertainmen...,joy,4.0,33.7,23.1,9.1,18.8,3.8,9.2,26.7,4.5,4.8


In [158]:
grouped_account_emotions_themes = pd.DataFrame()
for emotion in emotions:
    tmp = news_tweets[
        news_tweets.userId.isin(top_news[top_news.main_emotion==emotion].userId.values)
    ].theme.value_counts(normalize=True).reset_index()
    tmp.columns = ['theme', 'prop']
    tmp = tmp.merge(theme_desc)
    tmp['main_emotion'] = emotion
    grouped_account_emotions_themes = pd.concat([grouped_account_emotions_themes, tmp])
grouped_account_emotions_themes = grouped_account_emotions_themes.pivot('main_emotion', 'theme_desc', 'prop').reset_index()
grouped_account_emotions_themes.iloc[:,-8:] = np.round(grouped_account_emotions_themes.iloc[:,-8:]*100,1)
grouped_account_emotions_themes

theme_desc,main_emotion,Authorities & Politics,Cases and deaths,Economic impact,Educational impact,People stories,Preventive measures,Vaccines and vaccination,Virus spreading
0,anger,29.0,6.8,25.4,4.1,7.3,13.8,5.0,8.5
1,joy,25.8,18.0,17.4,2.5,7.6,17.4,6.6,4.8
2,optimism,10.1,11.0,29.0,3.0,7.3,25.7,7.3,6.6
3,sadness,22.7,13.4,18.3,2.8,9.8,20.7,6.4,5.8


In [170]:
global_emotions_themes = news_tweets.theme.value_counts(normalize=True).reset_index()
global_emotions_themes.columns = ['theme', 'prop']
global_emotions_themes = global_emotions_themes.merge(theme_desc).drop(columns='theme')
global_emotions_themes = global_emotions_themes.set_index('theme_desc').transpose()
np.round(global_emotions_themes[themes] * 100, 2)

theme_desc,Authorities & Politics,Cases and deaths,Economic impact,Educational impact,People stories,Preventive measures,Vaccines and vaccination,Virus spreading
prop,17.49,11.58,24.14,3.65,8.65,20.01,6.02,8.46


### Table: Subthemes per Top News Accounts

In [198]:
global_subtheme = news_tweets.subtheme.value_counts(normalize=True).reset_index()
global_subtheme.columns = ['subtheme', 'global']
global_subtheme['global'] = np.round(global_subtheme['global'] * 100, 2)
global_subtheme = global_subtheme.merge(subtheme_desc).sort_values('subtheme')

subtheme_top_accounts = pd.DataFrame()
for emotion in emotions:
    tmp = news_tweets[
        news_tweets.userId.isin(top_news[top_news.main_emotion==emotion].userId.values)
    ].subtheme.value_counts(normalize=True).reset_index()
    tmp.columns = ['subtheme', 'prop']
    tmp = tmp.merge(subtheme_desc)
    tmp['main_emotion'] = emotion
    subtheme_top_accounts = pd.concat([subtheme_top_accounts, tmp])
subtheme_top_accounts = subtheme_top_accounts.pivot('subtheme_desc', 'main_emotion', 'prop').reset_index()
subtheme_top_accounts.iloc[:,-4:] = np.round(subtheme_top_accounts.iloc[:,-4:]*100,2)
subtheme_top_accounts = subtheme_top_accounts.merge(global_subtheme[['subtheme_desc', 'global']])
subtheme_top_accounts.to_csv('./../data/processed/subthemes_top_news.csv', index=False, sep=';')
subtheme_top_accounts

Unnamed: 0,subtheme_desc,anger,joy,optimism,sadness,global
0,Authorities & Politics (default),13.54,18.2,7.52,16.75,12.81
1,Cases and deaths (default),3.89,11.34,7.28,7.42,7.24
2,Cases and deaths decreasing,0.51,0.47,0.4,0.63,0.49
3,Cases and deaths increasing,2.43,6.15,3.34,5.38,3.85
4,Easing restrictions,0.06,0.04,0.13,0.06,0.08
5,Economic impact (default),12.85,10.18,13.89,10.64,14.15
6,Economic impact over national economies and bi...,6.9,4.03,6.1,3.6,4.64
7,Economic plans to support jobs and food programs,5.62,3.22,9.0,4.07,5.35
8,Educational impact (default),4.13,2.46,2.98,2.84,3.65
9,Elections,0.48,0.12,0.11,0.23,0.19


# Tweets examples

In [12]:
news_tweets.sample(25, random_state=99).merge(news_accounts, on='userId')

Unnamed: 0,tweetId,conversationId,userId,date,content,lang,sourceLabel,replyCount,retweetCount,likeCount,quoteCount,longitude,latitude,place,anger,joy,optimism,sadness,emotion_score,prevalent_emotion,country_x,theme,subtheme,ds,dsw,username,displayname,description,rawDescription,verified,created,followersCount,friendsCount,statusesCount,favouritesCount,listedCount,mediaCount,location,protected,linkUrl,linkTcourl,profileImageUrl,profileBannerUrl,country_y,media_outlet,media_category
0,1.24127e+18,1.24127e+18,36327410.0,2020-03-21 07:45:28+00:00,#Coronavirus #CoronaCrisis | State-wise break...,en,TweetDeck,1.0,3.0,31.0,0.0,,,,0.089,0.015,0.027,0.868,0.868,sadness,IN,1,11,2020-03-21,2020-03-22,htTweets,Hindustan Times,One of India's largest media companies. Latest...,One of India's largest media companies. Latest...,True,2009-04-29 10:11:34+00:00,8078225,132,899060,2929,7501,355049,India,False,http://www.hindustantimes.com,https://t.co/yEyv2GeRpS,https://pbs.twimg.com/profile_images/130014533...,https://pbs.twimg.com/profile_banners/36327407...,IN,Hindustan Times,Newspaper
1,1.239467e+18,1.239467e+18,34713360.0,2020-03-16 08:23:58+00:00,Philippine President Rodrigo Duterte widens a ...,en,SocialFlow,3.0,41.0,60.0,5.0,,,,0.351,0.032,0.113,0.503,0.503,sadness,US,7,71,2020-03-16,2020-03-22,business,Bloomberg,The first word in business news.,The first word in business news.,True,2009-04-23 20:05:17+00:00,7324238,1321,680486,1594,61538,283694,New York and the World,False,http://www.bloomberg.com,http://t.co/YFISwy1upH,https://pbs.twimg.com/profile_images/991818020...,https://pbs.twimg.com/profile_banners/34713362...,US,Bloomberg,Website
2,1.318492e+18,1.318492e+18,7.117605e+17,2020-10-20 10:00:00+00:00,Doctors &amp; researchers around the world are...,en,Twitter Media Studio,1.0,3.0,21.0,1.0,,,,0.058,0.027,0.295,0.62,0.62,sadness,IN,7,72,2020-10-20,2020-10-25,WIONews,WION,#WION: World Is One | Welcome to India’s first...,#WION: World Is One | Welcome to India’s first...,True,2016-03-21 03:44:54+00:00,341172,96,176490,7482,1221,58429,India,False,http://www.wionews.com,https://t.co/mmzWsrtj7H,https://pbs.twimg.com/profile_images/875597226...,https://pbs.twimg.com/profile_banners/71176046...,IN,Zee Media,Newspaper
3,1.287214e+18,1.287214e+18,7.117605e+17,2020-07-26 02:30:00+00:00,This is the first time that the country has ac...,en,TweetDeck,0.0,4.0,30.0,0.0,,,,0.184,0.083,0.091,0.642,0.642,sadness,IN,6,60,2020-07-26,2020-08-02,WIONews,WION,#WION: World Is One | Welcome to India’s first...,#WION: World Is One | Welcome to India’s first...,True,2016-03-21 03:44:54+00:00,341172,96,176490,7482,1221,58429,India,False,http://www.wionews.com,https://t.co/mmzWsrtj7H,https://pbs.twimg.com/profile_images/875597226...,https://pbs.twimg.com/profile_banners/71176046...,IN,Zee Media,Newspaper
4,1.229537e+18,1.229537e+18,759251.0,2020-02-17 22:45:01+00:00,Apple has warned investors that the ongoing co...,en,TweetDeck,47.0,79.0,229.0,21.0,,,,0.243,0.008,0.043,0.705,0.705,sadness,US,2,22,2020-02-17,2020-02-23,CNN,CNN,It’s our job to #GoThere & tell the most diffi...,It’s our job to #GoThere & tell the most diffi...,True,2007-02-09 00:35:02+00:00,53851246,1101,340498,1410,148471,170412,,False,http://www.cnn.com,http://t.co/IaghNW8Xm2,https://pbs.twimg.com/profile_images/127825916...,https://pbs.twimg.com/profile_banners/759251/1...,US,CNN,Television
5,1.318225e+18,1.318225e+18,13850420.0,2020-10-19 16:20:05+00:00,"Dr. Anthony Fauci said he is ""absolutely not"" ...",en,SocialFlow,79.0,118.0,498.0,8.0,,,,0.098,0.15,0.601,0.151,0.601,optimism,US,5,51,2020-10-19,2020-10-25,CNNPolitics,CNN Politics,"Political news, campaign stories and Washingto...","Political news, campaign stories and Washingto...",True,2008-02-23 03:12:49+00:00,4136271,344,200996,4,20943,152532,"Washington, DC",False,http://cnn.com/politics,https://t.co/KWFMkrEjdY,https://pbs.twimg.com/profile_images/918899077...,https://pbs.twimg.com/profile_banners/13850422...,US,CNN,Television
6,1.238283e+18,1.238283e+18,13850420.0,2020-03-13 01:58:03+00:00,Fact check: President Trump incorrectly sugges...,en,SocialFlow,25.0,49.0,77.0,2.0,,,,0.744,0.022,0.076,0.158,0.744,anger,US,4,40,2020-03-13,2020-03-15,CNNPolitics,CNN Politics,"Political news, campaign stories and Washingto...","Political news, campaign stories and Washingto...",True,2008-02-23 03:12:49+00:00,4136271,344,200996,4,20943,152532,"Washington, DC",False,http://cnn.com/politics,https://t.co/KWFMkrEjdY,https://pbs.twimg.com/profile_images/918899077...,https://pbs.twimg.com/profile_banners/13850422...,US,CNN,Television
7,1.383692e+18,1.383692e+18,240649800.0,2021-04-18 07:59:47+00:00,#NewsAlert | DMK chief MK Stalin writes to PM ...,en,Twitter Media Studio - LiveCut,3.0,2.0,26.0,1.0,,,,0.155,0.095,0.528,0.222,0.528,optimism,IN,8,80,2021-04-18,2021-04-25,TimesNow,TIMES NOW,TIMES NOW is India’s most watched English news...,TIMES NOW is India’s most watched English news...,True,2011-01-20 12:17:23+00:00,10059534,377,674240,4,5449,361394,India,False,http://www.timesnownews.com,https://t.co/1r9xZjg7do,https://pbs.twimg.com/profile_images/135407453...,https://pbs.twimg.com/profile_banners/24064981...,IN,TIMES NOW,Television
8,1.286285e+18,1.286285e+18,19903360.0,2020-07-23 13:00:32+00:00,Data collection in new Covid-19 app ‘troubling...,en,dlvr.it,0.0,1.0,4.0,1.0,,,,0.102,0.013,0.067,0.819,0.819,sadness,IE,2,20,2020-07-23,2020-07-26,irishexaminer,Irish Examiner,Trusted coverage from irishexaminer.com | Shar...,Trusted coverage from https://t.co/KisLhDt4hz ...,True,2009-02-02 12:01:29+00:00,218041,598,398735,587,1678,242594,Ireland,False,http://www.irishexaminer.com,https://t.co/i7c8FvXflv,https://pbs.twimg.com/profile_images/127760298...,https://pbs.twimg.com/profile_banners/19903360...,IE,Irish Examiner,Newspaper
9,1.374331e+18,1.374331e+18,16887180.0,2021-03-23 12:04:13+00:00,UK falls silent to remember Covid victims on a...,en,TweetDeck,3.0,1.0,9.0,1.0,,,,0.023,0.006,0.012,0.959,0.959,sadness,UK,4,40,2021-03-23,2021-03-28,DailyMirror,The Mirror,The official Mirror Twitter account \n(Daily M...,The official Mirror Twitter account \n(Daily M...,True,2008-10-21 14:58:45+00:00,1258436,4144,774917,1369,6348,461217,UK,False,http://www.mirror.co.uk/,http://t.co/QVBfMK244U,https://pbs.twimg.com/profile_images/132681839...,https://pbs.twimg.com/profile_banners/16887175...,UK,Daily Mirror,Newspaper


In [13]:
news_tweets.columns

Index(['tweetId', 'conversationId', 'userId', 'date', 'content', 'lang',
       'sourceLabel', 'replyCount', 'retweetCount', 'likeCount', 'quoteCount',
       'longitude', 'latitude', 'place', 'anger', 'joy', 'optimism', 'sadness',
       'emotion_score', 'prevalent_emotion', 'country', 'theme', 'subtheme',
       'ds', 'dsw'],
      dtype='object')

In [14]:
news_tweets.sample(25, random_state=99).merge(news_accounts, on=('userId','country')).iloc[[4,5,8,18]]

Unnamed: 0,tweetId,conversationId,userId,date,content,lang,sourceLabel,replyCount,retweetCount,likeCount,quoteCount,longitude,latitude,place,anger,joy,optimism,sadness,emotion_score,prevalent_emotion,country,theme,subtheme,ds,dsw,username,displayname,description,rawDescription,verified,created,followersCount,friendsCount,statusesCount,favouritesCount,listedCount,mediaCount,location,protected,linkUrl,linkTcourl,profileImageUrl,profileBannerUrl,media_outlet,media_category
4,1.229537e+18,1.229537e+18,759251.0,2020-02-17 22:45:01+00:00,Apple has warned investors that the ongoing co...,en,TweetDeck,47.0,79.0,229.0,21.0,,,,0.243,0.008,0.043,0.705,0.705,sadness,US,2,22,2020-02-17,2020-02-23,CNN,CNN,It’s our job to #GoThere & tell the most diffi...,It’s our job to #GoThere & tell the most diffi...,True,2007-02-09 00:35:02+00:00,53851246,1101,340498,1410,148471,170412,,False,http://www.cnn.com,http://t.co/IaghNW8Xm2,https://pbs.twimg.com/profile_images/127825916...,https://pbs.twimg.com/profile_banners/759251/1...,CNN,Television
5,1.318225e+18,1.318225e+18,13850420.0,2020-10-19 16:20:05+00:00,"Dr. Anthony Fauci said he is ""absolutely not"" ...",en,SocialFlow,79.0,118.0,498.0,8.0,,,,0.098,0.15,0.601,0.151,0.601,optimism,US,5,51,2020-10-19,2020-10-25,CNNPolitics,CNN Politics,"Political news, campaign stories and Washingto...","Political news, campaign stories and Washingto...",True,2008-02-23 03:12:49+00:00,4136271,344,200996,4,20943,152532,"Washington, DC",False,http://cnn.com/politics,https://t.co/KWFMkrEjdY,https://pbs.twimg.com/profile_images/918899077...,https://pbs.twimg.com/profile_banners/13850422...,CNN,Television
8,1.286285e+18,1.286285e+18,19903360.0,2020-07-23 13:00:32+00:00,Data collection in new Covid-19 app ‘troubling...,en,dlvr.it,0.0,1.0,4.0,1.0,,,,0.102,0.013,0.067,0.819,0.819,sadness,IE,2,20,2020-07-23,2020-07-26,irishexaminer,Irish Examiner,Trusted coverage from irishexaminer.com | Shar...,Trusted coverage from https://t.co/KisLhDt4hz ...,True,2009-02-02 12:01:29+00:00,218041,598,398735,587,1678,242594,Ireland,False,http://www.irishexaminer.com,https://t.co/i7c8FvXflv,https://pbs.twimg.com/profile_images/127760298...,https://pbs.twimg.com/profile_banners/19903360...,Irish Examiner,Newspaper
18,1.309171e+18,1.309171e+18,1.066973e+18,2020-09-24 16:39:54+00:00,No refund on lockdown flights originating outs...,en,ht-twitter-handler,0.0,0.0,1.0,0.0,,,,0.754,0.008,0.025,0.213,0.754,anger,IN,6,61,2020-09-24,2020-09-27,HindustanTimes,Hindustan Times,One of India's largest media companies. Latest...,One of India's largest media companies. Latest...,True,2018-11-26 08:30:58+00:00,42987,12,339368,9,220,228,"New Delhi, India",False,https://www.hindustantimes.com/,https://t.co/WtJj2BvIpU,https://pbs.twimg.com/profile_images/130014548...,,Hindustan Times,Newspaper


In [15]:
n = news_tweets.sample(25, random_state=99).merge(news_accounts, on=('userId','country'))
n['engagement'] = n[['replyCount', 'retweetCount', 'likeCount', 'quoteCount']].sum(axis=1)
n.iloc[[4,5,8,18]][['content', 'theme', 'subtheme', 'country']]

Unnamed: 0,content,theme,subtheme,country
4,Apple has warned investors that the ongoing co...,2,22,US
5,"Dr. Anthony Fauci said he is ""absolutely not"" ...",5,51,US
8,Data collection in new Covid-19 app ‘troubling...,2,20,IE
18,No refund on lockdown flights originating outs...,6,61,IN


In [16]:
list(news_tweets.sample(25, random_state=99).merge(news_accounts, on='userId').content)

['#Coronavirus #CoronaCrisis |  State-wise breakup of Covid-19 cases in India, total climbs to 258\n\nhttps://t.co/VU03kvmnAt https://t.co/GNP1eXQrIt',
 'Philippine President Rodrigo Duterte widens a lockdown to the entire Luzon island to contain an outbreak of the novel coronavirus https://t.co/SDyko6Lr0M',
 'Doctors &amp; researchers around the world are trying to study the dangerous link between diabetes &amp; COVID-19. Many experts believe that COVID-19 can trigger the onset of diabetes even in some adults &amp; children who do not have traditional risk factors. @Shobhit10Mittal tells you more https://t.co/TXi9BW27rF',
 'This is the first time that the country has acknowledged that the virus may have crossed into the country\n\n#NorthKorea #Coronavirus\n\nhttps://t.co/GaBtrm1jmZ',
 'Apple has warned investors that the ongoing coronavirus outbreak is hurting its business more than previously expected by limiting how many devices it can make and sell in China https://t.co/gQVs7E3XeQ'

In [17]:
theme_desc

Unnamed: 0,theme,theme_desc
0,1,Cases and deaths
1,2,Economic impact
2,3,Educational impact
3,4,People stories
4,5,Authorities & Politics
5,6,Preventive measures
6,7,Virus spreading
7,8,Vaccines and vaccination


In [18]:
list(news_tweets.sample(25, random_state=73).merge(news_accounts, on='userId').content)

['https://t.co/By0KBr5HQV | Hairdressers lose court bid to return to work during lockdown https://t.co/Hq5A45ndaD https://t.co/AZoPZrN9Dj',
 'Just weeks after becoming the first man to walk on the moon, NASA astronaut Neil Armstrong celebrated his 39th birthday in quarantine https://t.co/5RsBHGXczl',
 'HAPPENING NOW: The WHO gives a media briefing on the COVID-19 pandemic. https://t.co/Pyl256QAJy',
 'In their line of work, they might be the last pockets of people that are still unaware of coronavirus. https://t.co/pSErtUkaxo',
 'Ontario long-term care home identifies teen staff member who died of COVID-19 https://t.co/KeB0ZLhEKL https://t.co/Awwg2SURJO',
 "EU agency insists it has the 'most appropriate' vaccine regulation process after UK approves Pfizer jab https://t.co/fttXsv6ptZ",
 'PRC expects approval this week of saliva-based testing for COVID-19\nhttps://t.co/xy7tU2r1jN',
 "Coronavirus restrictions need to be lifted 'sooner rather than later' - senior FF TD https://t.co/IB8DknLT

In [19]:
list(news_tweets.sample(25, random_state=13).merge(news_accounts, on='userId').iloc[[3,9,16,21]].content)

['LOOK: Coronavirus-wary animal owners in the Philippines had their pets blessed via a drive-through ceremony on Sunday to mark World Animal Day and the feast of Saint Francis of Assisi, the patron saint of animals. 📷 Reuters\n\nFULL STORY: https://t.co/Cfnljiseh1 https://t.co/w1KTf2YVlg',
 'Join @riatrillo for an in-depth conversation with Cabinet Secretary Karlo Nograles | Watch #TheSource here https://t.co/CaczwF9CtH\n\n• Readiness of PH in case of a coronavirus outbreak\n• Fate of the ABS-CBN franchise\n• Impact of the Visiting Forces Agreement with US https://t.co/XVGc7lsL0G',
 'Govt took unprecedented measure for labour welfare &amp; employment generation during Covid-19 pandemic: Labour and Employment Minister\n\nhttps://t.co/xGIVXMez6Y',
 '#Breaking | SENSATIONAL incident caught in camera in Kolkata.\n\nA COVID-19 suspect tried to escape from a hospital. \n\nDetails by Tamal Saha. https://t.co/ZLQkSgE7Uk']

In [20]:
comments['clen'] = comments.content.apply(lambda x: len(x))

In [21]:
c = pd.concat([
    comments[(comments.prevalent_emotion=='anger') & (comments.clen>100)].sample(5, random_state=13),
    comments[(comments.prevalent_emotion=='sadness') & (comments.clen>100)].sample(5, random_state=13),
    comments[(comments.prevalent_emotion=='optimism') & (comments.clen>100)].sample(5, random_state=13),
    comments[(comments.prevalent_emotion=='joy') & (comments.clen>100)].sample(5, random_state=13),
])
c

Unnamed: 0,tweetId,conversationId,userId,date,content,lang,sourceLabel,replyCount,retweetCount,likeCount,quoteCount,longitude,latitude,place,anger,joy,optimism,sadness,news_date,news_id,news_prevalent_emotion,news_emotion_score,news_anger,news_sadness,news_optimism,news_joy,theme,subtheme,country,emotion_score,prevalent_emotion,continent,ds,dsw,dsm,clen
9032671,1.275978e+18,1.275967e+18,1.097999e+18,2020-06-25 02:22:24+00:00,"@ABC Because his racism, pathological lying, s...",en,Twitter for iPhone,0.0,0.0,0.0,0.0,,,,0.957,0.004,0.01,0.028,2020-06-25 01:41:01+00:00,28785486.0,undefined,0.472,0.336,0.157,0.472,0.035,5,51,US,0.957,anger,America,2020-06-25,2020-06-28,2020-06-01,280
4635318,1.243226e+18,1.243187e+18,316244700.0,2020-03-26 17:19:16+00:00,@nuttysaham @CNNPolitics Left has nothing to d...,en,Twitter Web App,0.0,0.0,0.0,0.0,,,,0.846,0.007,0.081,0.066,2020-03-26 14:45:04+00:00,13850422.0,undefined,0.283,0.283,0.241,0.261,0.215,4,40,US,0.846,anger,America,2020-03-26,2020-03-29,2020-03-01,189
3725824,1.328131e+18,1.328123e+18,388507500.0,2020-11-16 00:20:41+00:00,"@DocWadeson @BreitbartNews Yup nothing else, m...",en,Twitter for iPad,0.0,0.0,0.0,0.0,,,,0.958,0.005,0.007,0.03,2020-11-15 23:47:55+00:00,457984599.0,undefined,0.432,0.097,0.432,0.399,0.073,5,51,US,0.958,anger,America,2020-11-15,2020-11-22,2020-11-01,140
12829163,1.256143e+18,1.256112e+18,30746510.0,2020-05-01 08:47:08+00:00,@nzherald First Amendment to the US constituti...,en,Twitter for iPhone,0.0,0.0,1.0,0.0,,,,0.788,0.012,0.075,0.125,2020-05-01 06:41:34+00:00,14765253.0,anger,0.942,0.942,0.036,0.019,0.004,4,40,NZ,0.788,anger,Oceania,2020-05-01,2020-05-03,2020-05-01,170
3392306,1.334096e+18,1.334043e+18,95475780.0,2020-12-02 11:25:09+00:00,@hannah62687 @guinness162 @BBCBreakfast End of...,en,Twitter for iPhone,2.0,0.0,0.0,0.0,,,,0.906,0.005,0.052,0.037,2020-12-02 07:55:00+00:00,143415291.0,optimism,0.852,0.046,0.047,0.852,0.055,5,51,UK,0.906,anger,Europe,2020-12-02,2020-12-06,2020-12-01,284
12229280,1.31724e+18,1.317163e+18,1.204838e+18,2020-10-16 23:02:56+00:00,"@Smurph512 @camelhumps @CNN yep, no one said j...",en,Twitter Web App,1.0,0.0,0.0,0.0,,,,0.134,0.103,0.256,0.507,2020-10-16 18:00:09+00:00,759251.0,anger,0.617,0.617,0.281,0.084,0.018,5,52,US,0.507,sadness,America,2020-10-16,2020-10-18,2020-10-01,106
5601275,1.387506e+18,1.387497e+18,1.068931e+18,2021-04-28 20:38:02+00:00,@nytimes And more shocking is the fact that we...,en,Twitter for Android,0.0,0.0,2.0,0.0,,,,0.275,0.008,0.037,0.68,2021-04-28 20:00:17+00:00,807095.0,sadness,0.932,0.037,0.932,0.025,0.006,1,10,US,0.68,sadness,America,2021-04-28,2021-05-02,2021-04-01,284
3207715,1.238393e+18,1.238391e+18,227433100.0,2020-03-13 09:12:52+00:00,@BBCr4today We need to slow the virus down so ...,en,Twitter Web App,0.0,0.0,6.0,0.0,,,,0.128,0.006,0.047,0.818,2020-03-13 09:04:49+00:00,8170292.0,optimism,0.818,0.04,0.086,0.818,0.056,2,21,UK,0.818,sadness,Europe,2020-03-13,2020-03-15,2020-03-01,138
3077806,1.359245e+18,1.359116e+18,2984031000.0,2021-02-09 20:57:50+00:00,@liam54344527 @jonnymain2011 @MillieWiller @BB...,en,Twitter for iPhone,0.0,0.0,1.0,0.0,,,,0.05,0.006,0.02,0.923,2021-02-09 12:24:52+00:00,15687507.0,joy,0.566,0.036,0.077,0.321,0.566,1,11,UK,0.923,sadness,Europe,2021-02-09,2021-02-14,2021-02-01,145
3974950,1.297649e+18,1.297588e+18,293126100.0,2020-08-23 21:36:06+00:00,"@t2hearn @TRUMP2TERM1 @Tuulmaker @business ""Br...",en,Twitter Web App,1.0,0.0,0.0,0.0,,,,0.209,0.056,0.105,0.63,2020-08-23 17:34:45+00:00,34713362.0,optimism,0.58,0.162,0.164,0.58,0.094,2,22,US,0.63,sadness,America,2020-08-23,2020-08-30,2020-08-01,165


In [22]:
[str(cnt) + ' | ' + text for cnt, text in enumerate(c.content)]

['0 | @ABC Because his racism, pathological lying, sexual assault and the fact he does nothing but Rage Tweet all day isn’t bad enough, +120 THOUSAND are dead, millions are unemployed, so he once again thinks if he denies something, we won’t movie the people literally dying around us 🙄',
 '1 | @nuttysaham @CNNPolitics Left has nothing to do with dividing the country by holding up the bill to put bullshit it that has nothing to do with the crisis. Why don’t you read the two bills.',
 '2 | @DocWadeson @BreitbartNews Yup nothing else, masks haven’t stopped COVID , the flue, heart attacks, strokes , come on people wake da fuhg up',
 '3 | @nzherald First Amendment to the US constitution says *nothing* about “freedom of worship.” \n\nShame she didn’t read that like she didn’t read anything about epidemiology.',
 '4 | @hannah62687 @guinness162 @BBCBreakfast End of the day like I say no vaccine has 100 percent uptake u don’t even need it to be 100 percent to get wiped out so I suggest u stop p

3 "(...) He doesn’t respect the reporters neither the people that are at the frontline fighting for this virus (...)"
7 "(...) so many doctors in Europe who have to decide who gets a respirator and who has to die (...)"
12 "(...) I do hope you and the rest of the 1743 family are well (...)"
17 "(...) I heard this as well via a reputable news source. You're not imagining it. :) (...)"

In [23]:
c['engagement'] = c[['replyCount', 'retweetCount', 'likeCount', 'quoteCount']].sum(axis=1)
c.iloc[[3,7,12,17]][['content', 'prevalent_emotion', 'emotion_score', 'country']]

Unnamed: 0,content,prevalent_emotion,emotion_score,country
12829163,@nzherald First Amendment to the US constituti...,anger,0.788,NZ
3207715,@BBCr4today We need to slow the virus down so ...,sadness,0.818,UK
12138138,@HGiamarco @KlongName @washingtonpost There ar...,optimism,0.881,US
734232,@dailytelegraph Good on them! Wish we had a si...,joy,0.963,AU


N/C | Content | Prevalent Emotion | Score | Theme | Subtheme | Engagement

# Analysis

In [24]:
g = comments[emotions].mean()
g

anger       0.538530
sadness     0.194305
optimism    0.143847
joy         0.123318
dtype: float64

In [25]:
c0 = comments.groupby(['country', 'theme', 'subtheme', 'news_prevalent_emotion']) \
       .agg({'anger':'mean', 'sadness':'mean', 'optimism':'mean', 'joy':'mean', 'tweetId':'count'}) \
       .reset_index()

for col in ['anger', 'sadness', 'optimism', 'joy']:
    c0['lift_' + col] = c0[col] / g[col]

c0[['lift_'+col for col in ['anger', 'sadness', 'optimism', 'joy']]].max()

lift_anger       1.786345
lift_sadness     4.760552
lift_optimism    5.109592
lift_joy         7.760428
dtype: float64

In [26]:
c1 = comments.groupby(['country', 'theme', 'subtheme']) \
       .agg({'anger':'mean', 'sadness':'mean', 'optimism':'mean', 'joy':'mean', 'tweetId':'count'}) \
       .reset_index()

for col in ['anger', 'sadness', 'optimism', 'joy']:
    c1['lift_' + col] = c1[col] / g[col]

c1[['lift_'+col for col in ['anger', 'sadness', 'optimism', 'joy']]].max()

lift_anger       1.491097
lift_sadness     3.427597
lift_optimism    4.317084
lift_joy         1.853006
dtype: float64

In [27]:
c2 = comments.groupby(['country', 'theme']) \
       .agg({'anger':'mean', 'sadness':'mean', 'optimism':'mean', 'joy':'mean', 'tweetId':'count'}) \
       .reset_index()

for col in ['anger', 'sadness', 'optimism', 'joy']:
    c2['lift_' + col] = c2[col] / g[col]

c2[['lift_'+col for col in ['anger', 'sadness', 'optimism', 'joy']]].max()

lift_anger       1.119947
lift_sadness     1.521808
lift_optimism    1.860433
lift_joy         1.400155
dtype: float64

In [28]:
c3 = comments.groupby(['country']) \
       .agg({'anger':'mean', 'sadness':'mean', 'optimism':'mean', 'joy':'mean', 'tweetId':'count'}) \
       .reset_index()

for col in ['anger', 'sadness', 'optimism', 'joy']:
    c3['lift_' + col] = c3[col] / g[col]

c3[['lift_'+col for col in ['anger', 'sadness', 'optimism', 'joy']]].max()

lift_anger       1.043756
lift_sadness     1.198348
lift_optimism    1.596903
lift_joy         1.185103
dtype: float64

In [29]:
c31 = comments.groupby(['continent', 'country', 'dsm']) \
       .agg({'anger':'mean', 'sadness':'mean', 'optimism':'mean', 'joy':'mean', 'tweetId':'count'}) \
       .reset_index()

for col in ['anger', 'sadness', 'optimism', 'joy']:
    c31['lift_' + col] = c31[col] / g[col]

c31[['lift_'+col for col in ['anger', 'sadness', 'optimism', 'joy']]].max()

lift_anger       1.185067
lift_sadness     1.505042
lift_optimism    2.055944
lift_joy         1.462452
dtype: float64

In [30]:
c4 = comments.groupby(['theme']) \
       .agg({'anger':'mean', 'sadness':'mean', 'optimism':'mean', 'joy':'mean', 'tweetId':'count'}) \
       .reset_index()

for col in ['anger', 'sadness', 'optimism', 'joy']:
    c4['lift_' + col] = c4[col] / g[col]

c4[['lift_'+col for col in ['anger', 'sadness', 'optimism', 'joy']]].max()

lift_anger       1.066736
lift_sadness     1.199857
lift_optimism    1.209570
lift_joy         1.085468
dtype: float64

In [31]:
c41 = comments.groupby(['theme', 'country']) \
       .agg({'anger':'mean', 'sadness':'mean', 'optimism':'mean', 'joy':'mean', 'tweetId':'count'}) \
       .reset_index()

for col in ['anger', 'sadness', 'optimism', 'joy']:
    c41['lift_' + col] = c41[col] / g[col]

c41[['lift_'+col for col in ['anger', 'sadness', 'optimism', 'joy']]].max()

lift_anger       1.119947
lift_sadness     1.521808
lift_optimism    1.860433
lift_joy         1.400155
dtype: float64

In [32]:
c42 = comments.groupby(['theme', 'dsw']) \
       .agg({'anger':'mean', 'sadness':'mean', 'optimism':'mean', 'joy':'mean', 'tweetId':'count'}) \
       .reset_index()

for col in ['anger', 'sadness', 'optimism', 'joy']:
    c42['lift_' + col] = c42[col] / g[col]

c42[['lift_'+col for col in ['anger', 'sadness', 'optimism', 'joy']]].max()

lift_anger       1.203078
lift_sadness     3.329813
lift_optimism    2.581840
lift_joy         2.389487
dtype: float64

In [33]:
c5 = comments.groupby(['subtheme']) \
       .agg({'anger':'mean', 'sadness':'mean', 'optimism':'mean', 'joy':'mean', 'tweetId':'count'}) \
       .reset_index()

for col in ['anger', 'sadness', 'optimism', 'joy']:
    c5['lift_' + col] = c5[col] / g[col]

c5[['lift_'+col for col in ['anger', 'sadness', 'optimism', 'joy']]].max()

lift_anger       1.132297
lift_sadness     1.686525
lift_optimism    1.209570
lift_joy         1.156571
dtype: float64

In [34]:
c51 = comments.groupby(['subtheme', 'country']) \
       .agg({'anger':'mean', 'sadness':'mean', 'optimism':'mean', 'joy':'mean', 'tweetId':'count'}) \
       .reset_index()

for col in ['anger', 'sadness', 'optimism', 'joy']:
    c51['lift_' + col] = c51[col] / g[col]

c51[['lift_'+col for col in ['anger', 'sadness', 'optimism', 'joy']]].max()

lift_anger       1.491097
lift_sadness     3.427597
lift_optimism    4.317084
lift_joy         1.853006
dtype: float64

In [35]:
c52 = comments.groupby(['subtheme', 'news_prevalent_emotion']) \
       .agg({'anger':'mean', 'sadness':'mean', 'optimism':'mean', 'joy':'mean', 'tweetId':'count'}) \
       .reset_index()

for col in ['anger', 'sadness', 'optimism', 'joy']:
    c52['lift_' + col] = c52[col] / g[col]

c52[['lift_'+col for col in ['anger', 'sadness', 'optimism', 'joy']]].max()

lift_anger       1.340687
lift_sadness     1.708586
lift_optimism    1.679799
lift_joy         2.612611
dtype: float64

In [36]:
c6 = comments.groupby(['news_prevalent_emotion']) \
       .agg({'anger':'mean', 'sadness':'mean', 'optimism':'mean', 'joy':'mean', 'tweetId':'count'}) \
       .reset_index()

for col in ['anger', 'sadness', 'optimism', 'joy']:
    c6['lift_' + col] = c6[col] / g[col]

c6[['lift_'+col for col in ['anger', 'sadness', 'optimism', 'joy']]].max()

lift_anger       1.156111
lift_sadness     1.162797
lift_optimism    1.174983
lift_joy         1.617622
dtype: float64

In [37]:
c61 = comments.groupby(['news_prevalent_emotion', 'country']) \
       .agg({'anger':'mean', 'sadness':'mean', 'optimism':'mean', 'joy':'mean', 'tweetId':'count'}) \
       .reset_index()

for col in ['anger', 'sadness', 'optimism', 'joy']:
    c61['lift_' + col] = c61[col] / g[col]

c61[['lift_'+col for col in ['anger', 'sadness', 'optimism', 'joy']]].max()

lift_anger       1.188647
lift_sadness     1.410172
lift_optimism    1.948979
lift_joy         2.032794
dtype: float64

In [38]:
c7 = comments.groupby(['dsw']) \
       .agg({'anger':'mean', 'sadness':'mean', 'optimism':'mean', 'joy':'mean', 'tweetId':'count'}) \
       .reset_index()

for col in ['anger', 'sadness', 'optimism', 'joy']:
    c7['lift_' + col] = c7[col] / g[col]

c7[['lift_'+col for col in ['anger', 'sadness', 'optimism', 'joy']]].max()

lift_anger       1.104865
lift_sadness     1.342397
lift_optimism    1.230990
lift_joy         1.108797
dtype: float64

In [39]:
c8 = comments.groupby(['continent']) \
       .agg({'anger':'mean', 'sadness':'mean', 'optimism':'mean', 'joy':'mean', 'tweetId':'count'}) \
       .reset_index()

for col in ['anger', 'sadness', 'optimism', 'joy']:
    c8['lift_' + col] = c8[col] / g[col]

c8[['lift_'+col for col in ['anger', 'sadness', 'optimism', 'joy']]].max()

lift_anger       1.037932
lift_sadness     1.106693
lift_optimism    1.273738
lift_joy         1.173479
dtype: float64

In [40]:
c81 = comments.groupby(['continent', 'dsm']) \
       .agg({'anger':'mean', 'sadness':'mean', 'optimism':'mean', 'joy':'mean', 'tweetId':'count'}) \
       .reset_index()

for col in ['anger', 'sadness', 'optimism', 'joy']:
    c81['lift_' + col] = c81[col] / g[col]

c81[['lift_'+col for col in ['anger', 'sadness', 'optimism', 'joy']]].max()

lift_anger       1.112780
lift_sadness     1.418362
lift_optimism    1.540471
lift_joy         1.364792
dtype: float64

In [41]:
c9 = comments.groupby(['continent', 'country', 'news_id']) \
       .agg({'anger':'mean', 'sadness':'mean', 'optimism':'mean', 'joy':'mean', 'tweetId':'count'}) \
       .reset_index()

for col in ['anger', 'sadness', 'optimism', 'joy']:
    c9['lift_' + col] = c9[col] / g[col]

c9[['lift_'+col for col in ['anger', 'sadness', 'optimism', 'joy']]].max()

lift_anger       1.148386
lift_sadness     2.312084
lift_optimism    5.575365
lift_joy         5.817188
dtype: float64

In [42]:
engaged_news = news_tweets.groupby('userId').tweetId.count()
engaged_news = list(engaged_news[engaged_news > 484].index)

In [43]:
c91 = c9[c9.news_id.isin(engaged_news)].merge(news_accounts.drop('country',axis=1), left_on='news_id', right_on='userId', how='left')

In [None]:
c9 = comments.groupby(['continent', 'country', 'news_id']) \
       .agg({'anger':'mean', 'sadness':'mean', 'optimism':'mean', 'joy':'mean', 'tweetId':'count'}) \
       .reset_index()

for col in ['anger', 'sadness', 'optimism', 'joy']:
    c9['lift_' + col] = c9[col] / g[col]

c9[['lift_'+col for col in ['anger', 'sadness', 'optimism', 'joy']]].max()

engaged_news = news_tweets.groupby('userId').tweetId.count()
engaged_news = list(engaged_news[engaged_news > 484].index)

c91 = c9[c9.news_id.isin(engaged_news)].merge(news_accounts.drop('country',axis=1), left_on='news_id', right_on='userId', how='left')

# Conclusions

#### T1: joyful news reflect in more joyful comments, mainly in IE, IN and PH

In [44]:
c6

Unnamed: 0,news_prevalent_emotion,anger,sadness,optimism,joy,tweetId,lift_anger,lift_sadness,lift_optimism,lift_joy
0,anger,0.6226,0.149087,0.122178,0.106134,3114617,1.156111,0.767283,0.849361,0.860654
1,joy,0.470751,0.186017,0.14375,0.199482,739089,0.87414,0.957346,0.999328,1.617622
2,optimism,0.51807,0.18201,0.169018,0.130902,2679863,0.962008,0.936722,1.174983,1.061499
3,sadness,0.514258,0.225938,0.141646,0.118159,6931133,0.954929,1.162797,0.984699,0.958166
4,undefined,0.541251,0.184841,0.147544,0.126365,4156202,1.005052,0.951292,1.025698,1.024706


In [45]:
c6[c6.lift_joy>1.5]

Unnamed: 0,news_prevalent_emotion,anger,sadness,optimism,joy,tweetId,lift_anger,lift_sadness,lift_optimism,lift_joy
1,joy,0.470751,0.186017,0.14375,0.199482,739089,0.87414,0.957346,0.999328,1.617622


In [46]:
c61[c61.news_prevalent_emotion=='joy'].sort_values('lift_joy')

Unnamed: 0,news_prevalent_emotion,country,anger,sadness,optimism,joy,tweetId,lift_anger,lift_sadness,lift_optimism,lift_joy
13,joy,CA,0.492252,0.198934,0.146259,0.162554,56064,0.914067,1.023821,1.01677,1.318169
12,joy,AU,0.509158,0.184852,0.137281,0.1687,22123,0.94546,0.951349,0.954353,1.368008
23,joy,ZA,0.451983,0.218784,0.146514,0.182723,13690,0.83929,1.125979,1.018538,1.481721
16,joy,KE,0.423523,0.214678,0.172986,0.188811,6342,0.786443,1.104848,1.202569,1.531088
22,joy,US,0.4926,0.178055,0.135752,0.193592,415149,0.914713,0.916367,0.943726,1.569865
17,joy,MY,0.425265,0.216059,0.163382,0.195305,2614,0.789677,1.111956,1.135802,1.583751
18,joy,NG,0.431903,0.192928,0.162623,0.212556,32514,0.802004,0.992911,1.130527,1.723644
19,joy,NZ,0.441967,0.196915,0.146838,0.214294,3819,0.820691,1.013434,1.02079,1.737738
21,joy,UK,0.448647,0.194351,0.139845,0.217158,114511,0.833097,1.000236,0.972177,1.760958
15,joy,IN,0.397255,0.191865,0.174086,0.236793,43915,0.737665,0.987442,1.210215,1.920181


#### News about vaccines generate more positive feelings (optmism and joy), mainly in PH, In and NZ

In [47]:
c4

Unnamed: 0,theme,anger,sadness,optimism,joy,tweetId,lift_anger,lift_sadness,lift_optimism,lift_joy
0,1,0.506412,0.23094,0.147051,0.115599,1406324,0.94036,1.188541,1.02227,0.937406
1,2,0.542575,0.18515,0.148538,0.123736,3714198,1.007511,0.952884,1.032613,1.003393
2,3,0.509572,0.206941,0.15268,0.130807,642184,0.946229,1.065031,1.061402,1.060732
3,4,0.500465,0.233138,0.140145,0.126252,1698593,0.929317,1.199857,0.974262,1.023794
4,5,0.574469,0.172661,0.13447,0.118399,4205557,1.066736,0.888608,0.934815,0.960111
5,6,0.532879,0.193646,0.141385,0.132089,3191397,0.989507,0.99661,0.982887,1.071127
6,7,0.545316,0.197116,0.143411,0.114157,1873826,1.012601,1.014467,0.996968,0.925714
7,8,0.502045,0.190106,0.173993,0.133858,888825,0.932251,0.978386,1.20957,1.085468


In [48]:
c41[c41.theme==8]

Unnamed: 0,theme,country,anger,sadness,optimism,joy,tweetId,lift_anger,lift_sadness,lift_optimism,lift_joy
84,8,AU,0.512658,0.205884,0.167758,0.113706,28978,0.951958,1.059593,1.166222,0.922053
85,8,CA,0.517181,0.196171,0.170122,0.116524,101318,0.960358,1.0096,1.182659,0.944909
86,8,IE,0.510192,0.204542,0.163804,0.121463,28474,0.94738,1.052686,1.138734,0.984961
87,8,IN,0.465553,0.19143,0.191187,0.151831,73073,0.864489,0.985203,1.329099,1.231214
88,8,KE,0.480735,0.207728,0.182952,0.128597,9863,0.892681,1.06908,1.271848,1.042811
89,8,MY,0.483734,0.202505,0.183849,0.12991,3641,0.898249,1.042198,1.278088,1.053457
90,8,NG,0.476901,0.187174,0.179449,0.156476,20149,0.885562,0.9633,1.247501,1.26888
91,8,NZ,0.492696,0.201095,0.177863,0.128357,3110,0.914892,1.034942,1.236475,1.040864
92,8,PH,0.412078,0.196301,0.239409,0.152216,26483,0.765191,1.01027,1.664331,1.234337
93,8,UK,0.49277,0.202883,0.17105,0.133297,154782,0.915027,1.044145,1.189108,1.080925


#### Comments in first weeks of 2020 had more sadness

In [49]:
c7[c7.lift_sadness>1.3]

Unnamed: 0,dsw,anger,sadness,optimism,joy,tweetId,lift_anger,lift_sadness,lift_optimism,lift_joy
0,2020-01-05,0.490615,0.256561,0.177074,0.075777,148,0.911026,1.320401,1.23099,0.614485
2,2020-01-19,0.473413,0.260835,0.15333,0.112425,1604,0.879085,1.342397,1.065926,0.911669
6,2020-02-16,0.466971,0.256691,0.150745,0.125598,33225,0.867121,1.32107,1.047952,1.01849


In [50]:
c7.tail(16)

Unnamed: 0,dsw,anger,sadness,optimism,joy,tweetId,lift_anger,lift_sadness,lift_optimism,lift_joy
54,2021-01-17,0.519599,0.200316,0.150819,0.129264,232450,0.964847,1.030937,1.04847,1.048214
55,2021-01-24,0.530551,0.194009,0.147228,0.128214,218399,0.985185,0.998474,1.023502,1.039705
56,2021-01-31,0.539757,0.189899,0.147215,0.12313,257592,1.002278,0.977323,1.023411,0.998474
57,2021-02-07,0.51406,0.205689,0.153552,0.126698,192202,0.954562,1.058585,1.067464,1.027411
58,2021-02-14,0.533315,0.193288,0.147008,0.126389,182413,0.990316,0.994766,1.021974,1.024903
59,2021-02-21,0.527288,0.191913,0.150863,0.129937,179161,0.979124,0.987687,1.048774,1.053676
60,2021-02-28,0.527865,0.197351,0.150858,0.123926,181998,0.980197,1.015674,1.048742,1.00493
61,2021-03-07,0.537935,0.18118,0.148929,0.131958,204437,0.998895,0.932451,1.035326,1.070061
62,2021-03-14,0.521773,0.190466,0.156458,0.131303,156749,0.968884,0.980243,1.087668,1.064755
63,2021-03-21,0.531838,0.197153,0.148465,0.122549,162834,0.987574,1.014657,1.032102,0.993762


#### News and comments related to 'Nursing homes and elderly victims', 'Mental health impact' & 'Family stories'

In [51]:
c5[['lift_'+col for col in ['anger', 'sadness', 'optimism', 'joy']]].min()

lift_anger       0.789800
lift_sadness     0.784112
lift_optimism    0.828727
lift_joy         0.776553
dtype: float64

In [52]:
c5.sort_values('lift_sadness')

Unnamed: 0,subtheme,anger,sadness,optimism,joy,tweetId,lift_anger,lift_sadness,lift_optimism,lift_joy
19,63,0.607061,0.152357,0.121853,0.118731,73459,1.127257,0.784112,0.847104,0.962803
15,53,0.589802,0.152616,0.143268,0.11431,70938,1.095207,0.785444,0.995977,0.926952
14,52,0.609775,0.153795,0.125772,0.110656,1866272,1.132297,0.791515,0.874344,0.897325
13,51,0.585349,0.17143,0.135461,0.10776,291746,1.08694,0.88227,0.9417,0.873837
5,22,0.555032,0.178984,0.150142,0.115841,757010,1.030643,0.921149,1.043759,0.939366
4,21,0.544031,0.183828,0.150414,0.121728,896591,1.010215,0.946076,1.045651,0.987107
17,61,0.55304,0.184632,0.135098,0.127231,1230894,1.026944,0.950216,0.939175,1.031728
20,64,0.545927,0.186758,0.144945,0.122369,74713,1.013736,0.96116,1.007634,0.992302
3,20,0.537365,0.187991,0.147133,0.127511,2060597,0.997837,0.967504,1.022845,1.034001
18,62,0.542893,0.188317,0.144433,0.124358,151688,1.008102,0.96918,1.004075,1.008435


In [53]:
c52[(c52.subtheme>40) & (c52.subtheme<50)]

Unnamed: 0,subtheme,news_prevalent_emotion,anger,sadness,optimism,joy,tweetId,lift_anger,lift_sadness,lift_optimism,lift_joy
40,41,anger,0.593393,0.207722,0.119758,0.079142,1434,1.101876,1.069052,0.832537,0.641774
41,41,joy,0.350961,0.179362,0.182826,0.28684,2466,0.651701,0.923093,1.270978,2.326019
42,41,optimism,0.491667,0.212234,0.187043,0.109056,909,0.912979,1.092273,1.30029,0.884349
43,41,sadness,0.52054,0.270143,0.121786,0.087532,16672,0.966595,1.390304,0.846638,0.709807
44,41,undefined,0.524344,0.222994,0.135684,0.116959,2260,0.973659,1.147649,0.943249,0.948433
45,42,anger,0.722,0.0726,0.0634,0.142,5,1.340687,0.373639,0.440746,1.151495
46,42,optimism,0.40561,0.248854,0.241634,0.103732,41,0.75318,1.280736,1.679799,0.841173
47,42,sadness,0.424059,0.331987,0.119575,0.124375,1242,0.787438,1.708586,0.831264,1.008573
48,42,undefined,0.6064,0.1646,0.2008,0.0284,5,1.126029,0.847121,1.395927,0.230299
49,43,anger,0.589436,0.169798,0.113121,0.127658,5204,1.094528,0.873872,0.786397,1.035194


In [54]:
c5[c5.lift_sadness>1]

Unnamed: 0,subtheme,anger,sadness,optimism,joy,tweetId,lift_anger,lift_sadness,lift_optimism,lift_joy
0,10,0.497744,0.234452,0.148067,0.119739,787044,0.924264,1.206616,1.029336,0.970979
1,11,0.519743,0.226479,0.144606,0.109174,555634,0.965114,1.165583,1.005272,0.885302
2,12,0.497221,0.226454,0.155827,0.120498,63646,0.923293,1.165455,1.083285,0.977131
6,30,0.509572,0.206941,0.15268,0.130807,642184,0.946229,1.065031,1.061402,1.060732
7,40,0.501536,0.229889,0.141452,0.127123,1547721,0.931306,1.183131,0.983351,1.030857
8,41,0.506583,0.250238,0.131826,0.111353,23741,0.940677,1.287859,0.916429,0.902974
9,42,0.425331,0.327701,0.123542,0.123418,1293,0.7898,1.686525,0.858844,1.000808
10,43,0.550403,0.21189,0.11921,0.118506,22516,1.022047,1.090502,0.828727,0.960979
11,44,0.473069,0.281337,0.127241,0.118353,103322,0.878445,1.447915,0.884558,0.959736
16,60,0.513189,0.202762,0.146555,0.137495,1624751,0.952944,1.043523,1.018824,1.11496


In [55]:
c51[c51.lift_sadness>1.5]

Unnamed: 0,subtheme,country,anger,sadness,optimism,joy,tweetId,lift_anger,lift_sadness,lift_optimism,lift_joy
8,10,PH,0.343175,0.295439,0.209877,0.151513,27268,0.637244,1.520488,1.459031,1.228637
20,11,PH,0.334781,0.29639,0.21238,0.156458,14954,0.621658,1.525382,1.476431,1.268738
32,12,PH,0.324498,0.293192,0.250143,0.132163,1357,0.602563,1.508927,1.73895,1.071725
96,41,AU,0.499131,0.335143,0.09228,0.073486,321,0.92684,1.724829,0.641517,0.595907
98,41,IE,0.46905,0.31699,0.124457,0.089513,1555,0.870982,1.631401,0.865201,0.725873
101,41,MY,0.417,0.54,0.032,0.011,1,0.77433,2.779133,0.222458,0.0892
102,41,NG,0.3085,0.666,0.01475,0.011,4,0.572856,3.427597,0.102539,0.0892
108,42,CA,0.254529,0.508529,0.141647,0.095206,34,0.472638,2.617168,0.984706,0.772036
113,42,UK,0.431303,0.329913,0.11526,0.12352,1118,0.80089,1.697912,0.801269,1.001636
115,42,ZA,0.539,0.404,0.05,0.007,1,1.000873,2.079203,0.347591,0.056764


#### Emotions per continent
- America and Oceania have more anger
- Asia is more optimistic
- Europe has more sadness than average
- Africa have more joy and sadness

In [56]:
c8

Unnamed: 0,continent,anger,sadness,optimism,joy,tweetId,lift_anger,lift_sadness,lift_optimism,lift_joy
0,Africa,0.503765,0.210873,0.151472,0.13389,1405252,0.935446,1.085268,1.053005,1.08573
1,America,0.558957,0.184328,0.138787,0.117928,10818003,1.037932,0.94865,0.964826,0.95629
2,Asia,0.460086,0.211979,0.183223,0.144711,1468263,0.854338,1.090959,1.273738,1.173479
3,Europe,0.514437,0.215036,0.14091,0.129617,3048003,0.955261,1.106693,0.979579,1.051084
4,Oceania,0.557228,0.189218,0.138356,0.115199,881383,1.03472,0.973817,0.961824,0.934161


In [57]:
c42.query("theme==8").tweetId.mean()

12697.5

In [58]:
c42[c42.dsw>pd.to_datetime('2021-01-01')].sort_values(['dsw', 'theme']).tail(50)

Unnamed: 0,theme,dsw,anger,sadness,optimism,joy,tweetId,lift_anger,lift_sadness,lift_optimism,lift_joy
482,7,2021-03-21,0.522753,0.209922,0.154086,0.113243,14297,0.970705,1.08037,1.07118,0.918302
552,8,2021-03-21,0.503849,0.212429,0.165111,0.118618,27142,0.935601,1.093277,1.147822,0.961886
64,1,2021-03-28,0.499554,0.222667,0.150683,0.127104,11593,0.927626,1.145964,1.04752,1.030703
134,2,2021-03-28,0.525378,0.190042,0.151672,0.132904,41129,0.975578,0.978057,1.054397,1.077732
203,3,2021-03-28,0.516643,0.198544,0.161496,0.123304,5314,0.959359,1.021817,1.122691,0.999888
273,4,2021-03-28,0.456794,0.259331,0.151278,0.132583,7954,0.848224,1.334659,1.051656,1.075135
343,5,2021-03-28,0.555393,0.179712,0.129792,0.135106,24593,1.031313,0.924896,0.902293,1.09559
413,6,2021-03-28,0.514276,0.211991,0.144134,0.1296,33445,0.954963,1.09102,1.001993,1.050942
483,7,2021-03-28,0.534389,0.203086,0.151239,0.111291,15012,0.99231,1.045192,1.051388,0.902475
553,8,2021-03-28,0.51479,0.187394,0.168551,0.12927,19302,0.955918,0.96443,1.171735,1.048266


#### African event of high incidence of anger
- In 2020 October there was a news published by a big Nigerian TV Channel saying that people were looting COVID-19 resources, and commenters angrily commented that this was not true.

In [59]:
c81[c81.continent=='Africa']

Unnamed: 0,continent,dsm,anger,sadness,optimism,joy,tweetId,lift_anger,lift_sadness,lift_optimism,lift_joy
0,Africa,2020-01-01,0.513392,0.232159,0.141663,0.112805,3715,0.953321,1.194818,0.984815,0.914748
1,Africa,2020-02-01,0.532897,0.200581,0.130265,0.136258,37040,0.98954,1.032301,0.905578,1.104935
2,Africa,2020-03-01,0.476034,0.22222,0.160855,0.140889,238526,0.883952,1.143666,1.118238,1.14249
3,Africa,2020-04-01,0.49839,0.210906,0.155624,0.13508,369594,0.925464,1.085438,1.08187,1.095381
4,Africa,2020-05-01,0.504702,0.207826,0.157962,0.129511,199529,0.937186,1.069587,1.098127,1.050217
5,Africa,2020-06-01,0.506657,0.211102,0.146113,0.136128,78938,0.940816,1.086448,1.015751,1.103876
6,Africa,2020-07-01,0.4977,0.224397,0.147353,0.13055,87817,0.924184,1.154869,1.024369,1.058648
7,Africa,2020-08-01,0.51762,0.202318,0.144489,0.135573,49539,0.961172,1.041238,1.004463,1.099377
8,Africa,2020-09-01,0.510034,0.197073,0.147228,0.145663,31331,0.947087,1.014244,1.023504,1.181203
9,Africa,2020-10-01,0.599265,0.176383,0.116469,0.107883,53715,1.11278,0.907763,0.809671,0.87484


In [60]:
comments[(comments.country=='NG') & (comments.dsm=='2020-10-01') & (comments.prevalent_emotion=='anger')].conversationId.value_counts(normalize=True).cumsum().head(25).index

Float64Index([1.3166856469840773e+18, 1.3171886046077379e+18,
               1.319231803396395e+18, 1.3193416341155226e+18,
              1.3193295694684856e+18,  1.319997321745191e+18,
              1.3183019495995105e+18, 1.3206771328220856e+18,
               1.320977142126125e+18,  1.319737814427312e+18,
              1.3195245308141322e+18,  1.319717867835822e+18,
              1.3196266964091003e+18,  1.318750934030295e+18,
              1.3171733915148657e+18, 1.3210016620902728e+18,
              1.3209854470199828e+18, 1.3148242603392778e+18,
              1.3183316844634972e+18,  1.319709058312491e+18,
              1.3206903975517102e+18,  1.320967983154393e+18,
              1.3209706091285463e+18,  1.319993214435197e+18,
              1.3206377425822884e+18],
             dtype='float64')

In [61]:
c31[c31.continent=='Africa']

Unnamed: 0,continent,country,dsm,anger,sadness,optimism,joy,tweetId,lift_anger,lift_sadness,lift_optimism,lift_joy
0,Africa,KE,2020-01-01,0.528234,0.240243,0.133353,0.09817,1274,0.980881,1.236418,0.927048,0.796075
1,Africa,KE,2020-02-01,0.58433,0.206586,0.126127,0.082958,11979,1.085047,1.063203,0.87681,0.672712
2,Africa,KE,2020-03-01,0.486895,0.23223,0.164273,0.116602,46117,0.904119,1.195182,1.141999,0.94554
3,Africa,KE,2020-04-01,0.489843,0.224633,0.169144,0.116382,44919,0.909594,1.156083,1.17586,0.943756
4,Africa,KE,2020-05-01,0.477085,0.234043,0.171259,0.117607,24751,0.885902,1.204513,1.190565,0.953692
5,Africa,KE,2020-06-01,0.475983,0.22458,0.160338,0.139101,12731,0.883856,1.15581,1.114639,1.127989
6,Africa,KE,2020-07-01,0.45918,0.248792,0.163743,0.128279,17545,0.852656,1.280416,1.13831,1.040228
7,Africa,KE,2020-08-01,0.4945,0.234219,0.149402,0.121878,10997,0.918241,1.205416,1.03862,0.988319
8,Africa,KE,2020-09-01,0.503485,0.204077,0.154163,0.138285,8642,0.934924,1.05029,1.071714,1.121368
9,Africa,KE,2020-10-01,0.495175,0.247604,0.149722,0.107505,7925,0.919495,1.274302,1.04084,0.871769


Filtering the 25 Nigerian news tweets in Oct 2020 that have more prevalent anger comments (they represent 60% of anger comments)

In [62]:
news_tweets[news_tweets.tweetId.isin(comments[(comments.country=='NG') & (comments.dsm=='2020-10-01') & (comments.prevalent_emotion=='anger')].conversationId.value_counts(normalize=True).cumsum().head(25).index)].content.values

array(['The FCT Security Committee has just announced a ban on all #EndSARS street demonstrations, protests, and processions anywhere in Abuja.\n\nThe Committee accused protesters of violating COVID-19 guidelines regulating public gatherings, as well as endangering their own lives. https://t.co/rhgvTEFpPq',
       '[BREAKING] Resumption: 181 students, staff contract COVID-19 in Lagos private school https://t.co/AxssGKbjwE',
       '181 Students Test Positive For COVID-19 In Lekki, Lagos\nhttps://t.co/2GX2XGWAWQ https://t.co/TT2YYdltbh',
       '#EndSARS protest: Nigeria should prepare for increase in COVID-19 cases ― FG warns https://t.co/gv0zy6sAqT',
       '#ENDSARS protest: Nigeria should prepare for increase in COVID-19 cases — FG https://t.co/4JVbQJI7Nh',
       'Protesters should kindly vacate the roads and allow supplies, especially food, to get to the people. The economy should not be asphyxiated, otherwise the combined effects of the protests and COVID-19 pandemic could collap

#### NewsId and emotions
- News accounts that have more anger comments are likely to be from America and related to politics
- News accounts with optimistic comments are from the philippines
- News accounts with sad comments are from india
- BB Radio 4 is a case with joyful comments

In [63]:
c9[c9.lift_joy>2]

Unnamed: 0,continent,country,news_id,anger,sadness,optimism,joy,tweetId,lift_anger,lift_sadness,lift_optimism,lift_joy
7,Africa,KE,632859300.0,0.327115,0.200474,0.195462,0.276934,392,0.607422,1.03175,1.358816,2.245688
11,Africa,NG,15819470.0,0.343632,0.202714,0.182877,0.270777,1451,0.638093,1.043276,1.271331,2.195766
30,Africa,NG,1698798000.0,0.267,0.0668,0.3424,0.3234,5,0.495794,0.343789,2.380305,2.622489
95,America,US,15635600.0,0.438421,0.197737,0.071842,0.292316,19,0.814107,1.017661,0.499434,2.370424
125,Asia,IN,31632900.0,0.325366,0.177023,0.160806,0.336801,1379,0.604175,0.911057,1.117898,2.731156
133,Asia,IN,52535970.0,0.167969,0.192123,0.124723,0.515138,65,0.311903,0.98877,0.867053,4.17732
134,Asia,IN,92506190.0,0.319943,0.229429,0.137457,0.313343,35,0.594104,1.180764,0.955578,2.540935
156,Asia,IN,2348042000.0,0.268096,0.270543,0.210055,0.251247,470,0.497829,1.392359,1.460268,2.037391
162,Asia,IN,9.633674e+17,0.402309,0.193582,0.121673,0.282491,55,0.747051,0.996277,0.845848,2.290753
174,Asia,MY,477490500.0,0.2095,0.11475,0.271,0.4045,4,0.389022,0.590566,1.883945,3.280139


In [64]:
c9[c9.lift_optimism>2]

Unnamed: 0,continent,country,news_id,anger,sadness,optimism,joy,tweetId,lift_anger,lift_sadness,lift_optimism,lift_joy
30,Africa,NG,1698798000.0,0.267,0.0668,0.3424,0.3234,5,0.495794,0.343789,2.380305,2.622489
116,Asia,IN,9294762.0,0.25114,0.199962,0.307665,0.241182,650,0.466344,1.02911,2.138831,1.95577
141,Asia,IN,261113900.0,0.136,0.031,0.802,0.031,1,0.252539,0.159543,5.575365,0.251383
180,Asia,PH,17644830.0,0.264892,0.220842,0.395969,0.118279,2300,0.491879,1.136574,2.752709,0.95914
188,Asia,PH,63411620.0,0.296483,0.187066,0.344509,0.171927,1420,0.550542,0.962744,2.394968,1.394175
197,Asia,PH,8.233496e+17,0.287289,0.217422,0.329457,0.165859,1782,0.533469,1.118971,2.29033,1.344967


In [65]:
nt = news_tweets.merge(theme_desc).merge(subtheme_desc)

In [66]:
nt.groupby('country').theme_desc.value_counts(normalize=True)

country  theme_desc              
AU       Authorities & Politics      0.353421
         Economic impact             0.190050
         Preventive measures         0.167764
         Virus spreading             0.085002
         Cases and deaths            0.079383
                                       ...   
ZA       Cases and deaths            0.110621
         Virus spreading             0.080259
         People stories              0.075673
         Vaccines and vaccination    0.050205
         Educational impact          0.031455
Name: theme_desc, Length: 96, dtype: float64

In [67]:
nt.groupby('country').subtheme_desc.value_counts(normalize=True).rename('aaa').reset_index().pivot('subtheme_desc', 'country', 'aaa')

country,AU,CA,IE,IN,KE,MY,NG,NZ,PH,UK,US,ZA
subtheme_desc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Authorities & Politics (default),0.292282,0.110746,0.100837,0.159896,0.08622,0.079733,0.084995,0.177241,0.075295,0.1232,0.110545,0.143534
Cases and deaths (default),0.048614,0.086105,0.090389,0.082861,0.113555,0.114836,0.093139,0.086349,0.079839,0.032346,0.044597,0.067094
Cases and deaths decreasing,0.00493,0.006775,0.006934,0.005239,0.003356,0.006983,0.002951,0.003381,0.003588,0.005129,0.004455,0.003263
Cases and deaths increasing,0.02584,0.036072,0.031631,0.049211,0.063772,0.041355,0.033603,0.020366,0.040103,0.035005,0.030064,0.040264
Easing restrictions,0.001026,0.000896,0.00114,0.000573,0.000671,0.001543,0.0007,0.000423,0.001381,0.000474,0.000876,0.000864
Economic impact (default),0.100018,0.149587,0.132279,0.115191,0.13466,0.181699,0.236712,0.133446,0.12631,0.144731,0.150363,0.140656
Economic impact over national economies and big companies,0.041969,0.046806,0.052407,0.036786,0.043553,0.041923,0.039296,0.042527,0.054727,0.040443,0.06362,0.048459
Economic plans to support jobs and food programs,0.048063,0.045863,0.052244,0.051222,0.059368,0.062885,0.047983,0.049157,0.069423,0.035153,0.062157,0.06362
Educational impact (default),0.030157,0.050245,0.037371,0.02837,0.043687,0.024967,0.024475,0.031011,0.033983,0.033916,0.055279,0.031455
Elections,0.001915,0.002119,0.001167,0.002029,0.000886,0.001935,0.000809,0.002747,0.000649,0.001504,0.003499,0.001267


In [68]:
nt.theme_desc.value_counts(normalize=True)

Economic impact             0.241412
Preventive measures         0.200064
Authorities & Politics      0.174950
Cases and deaths            0.115796
People stories              0.086477
Virus spreading             0.084560
Vaccines and vaccination    0.060233
Educational impact          0.036508
Name: theme_desc, dtype: float64

In [69]:
nt.subtheme_desc.value_counts(normalize=True)

Economic impact (default)                                    0.141520
Authorities & Politics (default)                             0.128091
Preventive measures (default)                                0.108863
People stories (default)                                     0.079276
Mobility restrictions                                        0.076942
Cases and deaths (default)                                   0.072385
Vaccines and vaccination (default)                           0.060233
Economic plans to support jobs and food programs             0.053525
Virus spreading (default)                                    0.047078
Economic impact over national economies and big companies    0.046366
Cases and deaths increasing                                  0.038521
Political authorities                                        0.037490
Educational impact (default)                                 0.036508
Outbreak and pandemic origins                                0.019001
Research and treatme

In [70]:
c9 = comments.groupby(['continent', 'country', 'news_id']) \
       .agg({'anger':'mean', 'sadness':'mean', 'optimism':'mean', 'joy':'mean', 'tweetId':'count'}) \
       .reset_index()

for col in ['anger', 'sadness', 'optimism', 'joy']:
    c9['lift_' + col] = c9[col] / g[col]

c9[['lift_'+col for col in ['anger', 'sadness', 'optimism', 'joy']]].max()

lift_anger       1.148386
lift_sadness     2.312084
lift_optimism    5.575365
lift_joy         5.817188
dtype: float64

In [71]:
c91.sort_values('anger').tail(5)

Unnamed: 0,continent,country,news_id,anger,sadness,optimism,joy,tweetId,lift_anger,lift_sadness,lift_optimism,lift_joy,username,displayname,userId,description,rawDescription,verified,created,followersCount,friendsCount,statusesCount,favouritesCount,listedCount,mediaCount,location,protected,linkUrl,linkTcourl,profileImageUrl,profileBannerUrl,media_outlet,media_category
218,Oceania,AU,35466620.0,0.603498,0.1675,0.116814,0.112194,50488,1.12064,0.862044,0.812068,0.909794,theheraldsun,Herald Sun,35466620.0,Join the conversation on all the news from Mel...,Join the conversation on all the news from Mel...,True,2009-04-26 13:44:23+00:00,331964,32214,305722,1420,2381,41338,"Melbourne, Australia",False,http://www.heraldsun.com.au,http://t.co/LhVhBJqLWx,https://pbs.twimg.com/profile_images/118640924...,https://pbs.twimg.com/profile_banners/35466620...,The Herald Sun,Newspaper
76,America,US,2836421.0,0.60488,0.165092,0.127563,0.102465,635159,1.123207,0.849654,0.886793,0.8309,MSNBC,MSNBC,2836421.0,"The place for in-depth analysis, political com...","The place for in-depth analysis, political com...",True,2007-03-29 13:15:41+00:00,4219331,767,248262,818,28389,61955,,False,http://msnbc.com/live,https://t.co/YBwtJIhwY3,https://pbs.twimg.com/profile_images/132108981...,https://pbs.twimg.com/profile_banners/2836421/...,MSNBC,Television
107,America,US,457984599.0,0.610819,0.155204,0.11955,0.114424,182224,1.134234,0.798766,0.831094,0.927881,BreitbartNews,Breitbart News,457984599.0,"News, commentary, and destruction of the polit...","News, commentary, and destruction of the polit...",True,2012-01-08 01:50:52+00:00,1471106,110,145954,900,9202,3886,,False,http://breitbart.com,http://t.co/2sVbt3n6lO,https://pbs.twimg.com/profile_images/949270171...,https://pbs.twimg.com/profile_banners/45798459...,Breitbart News,Website
74,America,US,1367531.0,0.618269,0.148134,0.125591,0.108006,199893,1.148069,0.762379,0.873083,0.875836,FoxNews,Fox News,1367531.0,"Follow America's #1 cable news network, delive...","Follow America's #1 cable news network, delive...",True,2007-03-17 19:01:26+00:00,20185319,260,425666,2,68612,138108,U.S.A.,False,http://www.foxnews.com,http://t.co/ZYG58XZtAC,https://pbs.twimg.com/profile_images/918480715...,https://pbs.twimg.com/profile_banners/1367531/...,Fox,Television
85,America,US,13850422.0,0.61844,0.152102,0.126491,0.102965,437591,1.148386,0.782802,0.879346,0.834957,CNNPolitics,CNN Politics,13850422.0,"Political news, campaign stories and Washingto...","Political news, campaign stories and Washingto...",True,2008-02-23 03:12:49+00:00,4136271,344,200996,4,20943,152532,"Washington, DC",False,http://cnn.com/politics,https://t.co/KWFMkrEjdY,https://pbs.twimg.com/profile_images/918899077...,https://pbs.twimg.com/profile_banners/13850422...,CNN,Television


In [72]:
c91.sort_values('anger').tail(5).country.value_counts()

US    4
AU    1
Name: country, dtype: int64

In [73]:
nt[nt.userId.isin(c91[c91.username.isin(c91.sort_values('anger').tail(5).username.values)].userId.values)].theme_desc.value_counts(normalize=True)

Authorities & Politics      0.290414
Economic impact             0.253717
Preventive measures         0.138414
Virus spreading             0.085355
People stories              0.072796
Cases and deaths            0.068310
Vaccines and vaccination    0.049684
Educational impact          0.041311
Name: theme_desc, dtype: float64

In [74]:
nt[nt.userId.isin(c91[c91.username.isin(c91.sort_values('anger').tail(5).username.values)].userId.values)].subtheme_desc.value_counts(normalize=True)

Authorities & Politics (default)                             0.135424
Political authorities                                        0.129058
Economic impact (default)                                    0.128503
Preventive measures (default)                                0.086423
Economic impact over national economies and big companies    0.069036
People stories (default)                                     0.065448
Economic plans to support jobs and food programs             0.056177
Virus spreading (default)                                    0.050538
Vaccines and vaccination (default)                           0.049684
Educational impact (default)                                 0.041311
Cases and deaths (default)                                   0.038918
Mobility restrictions                                        0.037466
Cases and deaths increasing                                  0.024265
Health authorities                                           0.021104
Outbreak and pandemi

In [75]:
nt[nt.userId==c91[c91.username=='InsidersABC'].userId.values[0]].theme_desc.value_counts(normalize=True)

IndexError: index 0 is out of bounds for axis 0 with size 0

In [None]:
nt[nt.userId==c91[c91.username=='BreitbartNews'].userId.values[0]].theme_desc.value_counts(normalize=True)

In [None]:
nt[nt.userId==c91[c91.username=='PnPCBC'].userId.values[0]].theme_desc.value_counts(normalize=True)

In [None]:
nt[nt.userId==c91[c91.username=='FoxNews'].userId.values[0]].theme_desc.value_counts(normalize=True)

In [None]:
nt[nt.userId==c91[c91.username=='CNNPolitics'].userId.values[0]].theme_desc.value_counts(normalize=True)

In [None]:
c91.sort_values('sadness').tail(5).username

In [None]:
c91.sort_values('sadness').tail(5).country.value_counts()

In [None]:
nt[nt.userId.isin(c91[c91.username.isin(c91.sort_values('sadness').tail(5).username.values)].userId.values)].theme_desc.value_counts(normalize=True)

In [None]:
nt[nt.userId.isin(c91[c91.username.isin(c91.sort_values('sadness').tail(5).username.values)].userId.values)].subtheme_desc.value_counts(normalize=True)

In [None]:
c91.sort_values('sadness').tail(5)

In [None]:
c91.sort_values('sadness').tail(5).description.values

In [None]:
nt[nt.userId==c91[c91.username=='Oneindia'].userId.values[0]].theme_desc.value_counts(normalize=True)

In [None]:
nt[nt.userId==c91[c91.username=='sunrisedailynow'].userId.values[0]].theme_desc.value_counts(normalize=True)

In [None]:
nt[nt.userId==c91[c91.username=='fpjindia'].userId.values[0]].theme_desc.value_counts(normalize=True)

In [None]:
nt[nt.userId==c91[c91.username=='NewIndianXpress'].userId.values[0]].theme_desc.value_counts(normalize=True)

In [None]:
nt[nt.userId==c91[c91.username=='firstpost'].userId.values[0]].theme_desc.value_counts(normalize=True)

In [None]:
c91.sort_values('optimism').tail(5).country.value_counts()

In [None]:
c91.sort_values('optimism').tail(5).username

In [None]:
nt[nt.userId.isin(c91[c91.username.isin(c91.sort_values('optimism').tail(5).username.values)].userId.values)].theme_desc.value_counts(normalize=True)

In [None]:
nt[nt.userId.isin(c91[c91.username.isin(c91.sort_values('optimism').tail(5).username.values)].userId.values)].subtheme_desc.value_counts(normalize=True)

In [None]:
c91.sort_values('optimism').tail(5).description.values

In [None]:
nt[nt.userId==c91[c91.username=='ANCALERTS'].userId.values[0]].theme_desc.value_counts(normalize=True)

In [None]:
nt[nt.userId==c91[c91.username=='manilabulletin'].userId.values[0]].theme_desc.value_counts(normalize=True)

In [None]:
nt[nt.userId==c91[c91.username=='pnagovph'].userId.values[0]].theme_desc.value_counts(normalize=True)

In [None]:
nt[nt.userId==c91[c91.username=='TheManilaTimes'].userId.values[0]].theme_desc.value_counts(normalize=True)

In [None]:
nt[nt.userId==c91[c91.username=='BusinessMirror'].userId.values[0]].theme_desc.value_counts(normalize=True)

In [None]:
c91.sort_values('joy').tail(5).country.value_counts()

In [None]:
c91.sort_values('joy').tail(5).username

In [None]:
nt[nt.userId.isin(c91[c91.username.isin(c91.sort_values('joy').tail(5).username.values)].userId.values)].theme_desc.value_counts(normalize=True)

In [None]:
nt[nt.userId.isin(c91[c91.username.isin(c91.sort_values('joy').tail(5).username.values)].userId.values)].subtheme_desc.value_counts(normalize=True)

In [None]:
c91.sort_values('joy').tail(5).description.values

In [None]:
nt[nt.userId==c91[c91.username=='DDNewslive'].userId.values[0]].theme_desc.value_counts(normalize=True)

In [None]:
nt[nt.userId==c91[c91.username=='bellanaija'].userId.values[0]].theme_desc.value_counts(normalize=True)

In [None]:
nt[nt.userId==c91[c91.username=='mid_day'].userId.values[0]].theme_desc.value_counts(normalize=True)

In [None]:
nt[nt.userId==c91[c91.username=='BBCRadio4'].userId.values[0]].theme_desc.value_counts(normalize=True)

# Trunk0

In [95]:
nt[nt.userId==c91[c91.username=='RadioCitizenFM'].userId.values[0]].theme_desc.value_counts(normalize=True)

IndexError: index 0 is out of bounds for axis 0 with size 0

In [None]:
nt[nt.userId==c91[c91.username=='BBCRadio4'].userId.values[0]].theme_desc.value_counts(normalize=True)

In [None]:
c91['politic'] = c91.description.apply(lambda x: 'politic' in x.lower())
c91['live_or_break'] = c91.description.apply(lambda x: 'live' in x.lower() or 'break' in x.lower())

In [None]:
c91[c91.politic].anger.mean() / c91[~c91.politic].anger.mean()

In [None]:
c91[c91.continent=='America'].anger.mean() / c91[~(c91.continent=='America')].anger.mean()

In [None]:
c91[c91.live_or_break].sadness.mean() / c91[~c91.live_or_break].sadness.mean()

In [None]:
c91[c91.live_or_break]

In [None]:
c91.sort_values('anger').tail(5).description.values

In [None]:
c91.sort_values('optimism')

In [None]:
c91.sort_values('sadness')

In [None]:
c91.sort_values('joy')

In [None]:
nt[nt.userId==23937508.0].sort_values('likeCount').tail(25).content.values

In [None]:
nt[nt.userId==23937508.0].sort_values('likeCount').tail(25)

In [None]:
nt[nt.userId==23937508.0].prevalent_emotion.value_counts(normalize=True)

In [None]:
c9.iloc[201,:]

In [None]:
news_accounts[news_accounts.userId==17644834.0]

In [None]:
comments.query("news_id==17644834.0").news_prevalent_emotion.value_counts()

In [None]:
comments.query("news_id==17644834.0").prevalent_emotion.value_counts()

In [None]:
news_tweets.query("userId==17644834.0 and prevalent_emotion=='optimism'").content.values

In [None]:
comments.query("news_id==17644834.0")

In [None]:
c9.iloc[234,:]

In [None]:
comments.query("news_id==23937508.0").news_prevalent_emotion.value_counts()

In [None]:
comments.query("news_id==23937508.0").prevalent_emotion.value_counts()

In [None]:
comments.query("news_id==23937508.0")

In [None]:
news_accounts[news_accounts.userId==23937508.0]

# Likes, replies and shares related with emotions

In [None]:
comments.columns

In [80]:
comments.groupby('prevalent_emotion')[['replyCount', 'retweetCount', 'likeCount', 'quoteCount']].mean()

Unnamed: 0_level_0,replyCount,retweetCount,likeCount,quoteCount
prevalent_emotion,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
anger,0.401236,0.181391,2.172032,0.042514
joy,0.286419,0.186197,1.954152,0.064334
optimism,0.466677,0.388864,2.852092,0.210058
sadness,0.500163,0.524447,2.862427,0.14367
undefined,0.453811,0.404617,2.665493,0.15336


In [81]:
comments.groupby(['news_prevalent_emotion','prevalent_emotion']).tweetId.count()

news_prevalent_emotion  prevalent_emotion
anger                   anger                2030624
                        joy                   265385
                        optimism              223519
                        sadness               255259
                        undefined             339943
joy                     anger                 346331
                        joy                   135494
                        optimism               67078
                        sadness                91242
                        undefined              98946
optimism                anger                1402380
                        joy                   281880
                        optimism              317402
                        sadness               293520
                        undefined             384711
sadness                 anger                3592959
                        joy                   658633
                        optimism              621955
    

# Trunk

In [None]:
theme_desc[theme_desc.theme==4].theme_desc.values[0]

In [None]:
k = np.random.randint(1,news_tweets.shape[0],1)[0]
print('k:', k, '| Tweet:', news_tweets.iloc[k,:].content, '| Theme:', theme_desc[theme_desc.theme==news_tweets.iloc[k,:].theme].theme_desc.values[0])

In [None]:
k = 1145364
print('k:', k, '| Tweet:', news_tweets.iloc[k,:].content, '| Theme:', theme_desc[theme_desc.theme==news_tweets.iloc[k,:].theme].theme_desc.values[0])

In [None]:
k = np.random.randint(1,news_tweets.shape[0],1)[0]
print('k:', k, '| Tweet:', news_tweets.iloc[k,:].content, '| Theme:', theme_desc[theme_desc.theme==news_tweets.iloc[k,:].theme].theme_desc.values[0])

In [None]:
k = 377531
print('k:', k, '| Tweet:', news_tweets.iloc[k,:].content, '| Theme:', theme_desc[theme_desc.theme==news_tweets.iloc[k,:].theme].theme_desc.values[0])

In [None]:
k = np.random.randint(1,news_tweets.shape[0],1)[0]
print('k:', k, '| Tweet:', news_tweets.iloc[k,:].content, '| Theme:', theme_desc[theme_desc.theme==news_tweets.iloc[k,:].theme].theme_desc.values[0])

In [None]:
k = 497696 
print('k:', k, '| Tweet:', news_tweets.iloc[k,:].content, '| Theme:', theme_desc[theme_desc.theme==news_tweets.iloc[k,:].theme].theme_desc.values[0])

In [None]:
k = np.random.randint(1,news_tweets.shape[0],1)[0]
print('k:', k, '| Tweet:', news_tweets.iloc[k,:].content, '| Theme:', theme_desc[theme_desc.theme==news_tweets.iloc[k,:].theme].theme_desc.values[0])

In [None]:
k = np.random.randint(1,news_tweets.shape[0],1)[0]
print('k:', k, '| Tweet:', news_tweets.iloc[k,:].content, '| Theme:', theme_desc[theme_desc.theme==news_tweets.iloc[k,:].theme].theme_desc.values[0])

In [None]:
k = np.random.randint(1,news_tweets.shape[0],1)[0]
print('k:', k, '| Tweet:', news_tweets.iloc[k,:].content, '| Theme:', theme_desc[theme_desc.theme==news_tweets.iloc[k,:].theme].theme_desc.values[0])

In [None]:
k = np.random.randint(1,news_tweets.shape[0],1)[0]
print('k:', k, '| Tweet:', news_tweets.iloc[k,:].content, '| Theme:', theme_desc[theme_desc.theme==news_tweets.iloc[k,:].theme].theme_desc.values[0])

In [None]:
k = np.random.randint(1,news_tweets.shape[0],1)[0]
print('k:', k, '| Tweet:', news_tweets.iloc[k,:].content, '| Theme:', theme_desc[theme_desc.theme==news_tweets.iloc[k,:].theme].theme_desc.values[0])

In [None]:
k = np.random.randint(1,news_tweets.shape[0],1)[0]
print('k:', k, '| Tweet:', news_tweets.iloc[k,:].content, '| Theme:', theme_desc[theme_desc.theme==news_tweets.iloc[k,:].theme].theme_desc.values[0])

In [None]:
k = np.random.randint(1,news_tweets.shape[0],1)[0]
print('k:', k, '| Tweet:', news_tweets.iloc[k,:].content, '| Theme:', theme_desc[theme_desc.theme==news_tweets.iloc[k,:].theme].theme_desc.values[0])

In [None]:
k = np.random.randint(1,news_tweets.shape[0],1)[0]
print('k:', k, '| Tweet:', news_tweets.iloc[k,:].content, '| Theme:', theme_desc[theme_desc.theme==news_tweets.iloc[k,:].theme].theme_desc.values[0])

In [None]:
for _ in range(50):
    k = np.random.randint(1,news_tweets.shape[0],1)[0]
    print('k:', k, '| Tweet:', news_tweets.iloc[k,:].content, '| Theme:', theme_desc[theme_desc.theme==news_tweets.iloc[k,:].theme].theme_desc.values[0])

In [None]:
k=0
print('k:', k, '| Tweet:', news_tweets.iloc[k,:].content, '| Theme:', theme_desc[theme_desc.theme==news_tweets.iloc[k,:].theme].theme_desc.values[0])

In [None]:
news_tweets.iloc[k,:]

In [None]:
topics[topics.tweetId==1331811812828803073]

In [None]:
theme_desc

In [None]:
topics = pd.read_parquet('./news_tweets_topics2.parquet')

In [None]:
news_tweets[news_tweets.tweetId==1331811812828803073].content.values

In [None]:
news_tweets.iloc[k,:]

In [None]:
k

In [None]:
news_tweets = pd.read_parquet('./../data/raw/news_tweets_with_em_scores.parquet')
news_tweets['emotion_score'] = news_tweets[['anger','joy','optimism','sadness']].max(axis=1)
news_tweets['prevalent_emotion'] = news_tweets[['anger','joy','optimism','sadness']].idxmax(axis=1)
news_tweets['prevalent_emotion'] = np.where(news_tweets.emotion_score>0.5, news_tweets.prevalent_emotion, 'undefined')

comments = comments[comments.tweetId.isin(plain_comments.tweetId)]
topics['tweetId'] = topics.tweetId.astype('float64')
news_tweets = news_tweets[~news_tweets.conversationId.isnull()].merge(topics[['theme'] + subthemes + ['tweetId']], how='left', on='tweetId')
news_accounts = news_accounts.merge(media_list, left_on='username', right_on='account')
news_accounts['userId'] = news_accounts.userId.astype('float64')
news_tweets = news_tweets.merge(news_accounts[['userId','country']], how='left')

# Define subtheme
news_tweets['subtheme'] = news_tweets[subthemes].idxmax(axis=1)
news_tweets['subtheme'] = news_tweets.subtheme.apply(lambda x: int(x[-2:]))
news_tweets['aux'] = news_tweets[subthemes].sum(axis=1)
news_tweets.loc[news_tweets.aux==0,'subtheme'] = news_tweets.loc[news_tweets.aux==0,'theme']*10
news_tweets = news_tweets.drop(subthemes + ['aux'], axis=1)
news_tweets['ds'] = news_tweets.date.dt.date

df = comments.merge(news_tweets.rename(columns={'date':'newsDate', 'userId':'newsId'})[[
    'conversationId', 'newsDate', 'newsId', 'prevalent_emotion', 'emotion_score', 'theme', 'subtheme', 'country'
]].drop_duplicates('conversationId'))
df['ds'] = (df['newsDate'] + pd.offsets.Week(weekday=6)).dt.date

In [None]:
df.groupby(['country', 'theme']).theme.count().rename('count').reset_index()

In [None]:
# import libraries
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

# load dataset
tips = sns.load_dataset("tips")

# set the figure size
plt.figure(figsize=(14, 14))

# from raw value to percentage
total = tips.groupby('day')['total_bill'].sum().reset_index()
smoker = tips[tips.smoker=='Yes'].groupby('day')['total_bill'].sum().reset_index()
smoker['total_bill'] = [i / j * 100 for i,j in zip(smoker['total_bill'], total['total_bill'])]
total['total_bill'] = [i / j * 100 for i,j in zip(total['total_bill'], total['total_bill'])]

# bar chart 1 -> top bars (group of 'smoker=No')
bar1 = sns.barplot(x="day",  y="total_bill", data=total, color='darkblue')

# bar chart 2 -> bottom bars (group of 'smoker=Yes')
bar2 = sns.barplot(x="day", y="total_bill", data=smoker, color='lightblue')

# add legend
top_bar = mpatches.Patch(color='darkblue', label='smoker = No')
bottom_bar = mpatches.Patch(color='lightblue', label='smoker = Yes')
plt.legend(handles=[top_bar, bottom_bar])

# show the graph
plt.show()

# Rosie UK data

In [None]:
comments = comments.merge(news_tweets_with_em.rename(columns={'anger':'news_anger', 'sadness':'news_sadness', 'optimism':'news_optimism', 'joy':'news_joy'}).sort_values('date').drop_duplicates('conversationId', keep='first')[['conversationId', 'news_anger', 'news_sadness', 'news_optimism', 'news_joy']], how='left')
for i, g in df3[df3.newsName.isin(['BBC News (UK)', 'Daily Mail Online', 'The Guardian'])].groupby('newsName'):
    g.to_csv('./uk_users/{}.csv'.format(i), header=True, index_label=False, index=False)

# Google Data Studio

In [None]:
c2 = comments.merge(news_tweets_with_em.rename(columns={'anger':'news_anger', 'sadness':'news_sadness', 'optimism':'news_optimism', 'joy':'news_joy'}).sort_values('date').drop_duplicates('conversationId', keep='first')[['conversationId', 'news_anger', 'news_sadness', 'news_optimism', 'news_joy']], how='left')

In [None]:
c2[c2.country=='UK'].merge(theme_desc).merge(subtheme_desc).merge(news_accounts[['userId', 'displayname']].rename(columns={'userId':'newsId', 'displayname':'newsName'}), how='left').drop(['theme', 'subtheme', 'newsId'], axis=1)

In [None]:
df3 = df[df.country=='UK'].merge(theme_desc).merge(subtheme_desc).merge(news_accounts[['userId', 'displayname']].rename(columns={'userId':'newsId', 'displayname':'newsName'}), how='left').drop(['theme', 'subtheme', 'newsId'], axis=1)

In [None]:
c2

In [None]:
comments.news_emotion_score.isnull().sum()

In [None]:
comments

In [None]:
nt = pd.read_parquet('./../data/raw/news_tweets.parquet')

In [None]:
news_tweets['emotion_score'] = news_tweets[['anger','joy','optimism','sadness']].max(axis=1)
news_tweets['prevalent_emotion'] = news_tweets[['anger','joy','optimism','sadness']].idxmax(axis=1)
news_tweets['prevalent_emotion'] = np.where(news_tweets.emotion_score>0.5, news_tweets.prevalent_emotion, 'undefined')

comments = comments[comments.tweetId.isin(plain_comments.tweetId)]
topics['tweetId'] = topics.tweetId.astype('float64')
news_tweets = news_tweets[~news_tweets.conversationId.isnull()].merge(topics[['theme'] + subthemes + ['tweetId']], how='left', on='tweetId')
news_accounts = news_accounts.merge(media_list, left_on='username', right_on='account')
news_accounts['userId'] = news_accounts.userId.astype('float64')
news_tweets = news_tweets.merge(news_accounts[['userId','country']], how='left')

# Define subtheme
news_tweets['subtheme'] = news_tweets[subthemes].idxmax(axis=1)
news_tweets['subtheme'] = news_tweets.subtheme.apply(lambda x: int(x[-2:]))
news_tweets['aux'] = news_tweets[subthemes].sum(axis=1)
news_tweets.loc[news_tweets.aux==0,'subtheme'] = news_tweets.loc[news_tweets.aux==0,'theme']*10
news_tweets = news_tweets.drop(subthemes + ['aux'], axis=1)
news_tweets['ds'] = news_tweets.date.dt.date

In [None]:
df = comments.merge(news_tweets.rename(columns={'date':'newsDate', 'userId':'newsId', 'anger':'news_anger', 'sadness':'news_sadness', 'joy':'news_joy', 'optimism':'news_optimism'})[[
    'conversationId', 'newsDate', 'newsId', 'prevalent_emotion', 'emotion_score', 'theme', 'subtheme', 'country', 'news_anger', 'news_joy', 'news_sadness', 'news_optimism'
]].drop_duplicates('conversationId'))
df['ds'] = df.newsDate.dt.date

In [None]:
df3 = df[df.country=='UK'].merge(theme_desc).merge(subtheme_desc).merge(news_accounts[['userId', 'displayname']].rename(columns={'userId':'newsId', 'displayname':'newsName'}), how='left').drop(['theme', 'subtheme', 'newsId'], axis=1)

In [None]:
for i, g in df3[df3.newsName.isin(['BBC News (UK)', 'Daily Mail Online', 'The Guardian'])].groupby('newsName'):
    g.to_csv('./uk_users/{}.csv'.format(i), header=True, index_label=False, index=False)

In [None]:
news_tweets.rename(columns={'userId':'newsId'})

In [None]:
nt = news_tweets.rename(columns={'userId':'newsId'}).groupby(['ds', 'theme', 'subtheme', 'country', 'newsId']) \
       .agg({'content':'size', 'anger':'mean', 'joy':'mean', 'optimism':'mean', 'sadness':'mean', 'replyCount':'mean', 'retweetCount':'mean', 'likeCount':'mean', 'quoteCount':'mean'}) \
       .rename(columns={'content':'count'}) \
       .reset_index()
nt = nt.merge(theme_desc).merge(subtheme_desc)
nt = nt.drop(['theme_desc', 'subtheme_desc'], axis=1)
nt['newsId'] = nt.newsId.astype('int64')
nt.to_csv('agg_news_tweets.csv', index=False)

In [None]:
nt.columns

In [None]:
for col in ['replyCount', 'retweetCount', 'likeCount','quoteCount']:
    nt[col] = nt[col].astype('int32')

In [None]:
nt

In [None]:
nt.to_csv('agg_news_tweets.csv', index=False)

In [None]:
df['emotion_score'] = df[['anger','joy','optimism','sadness']].max(axis=1)
df['comment_emotion'] = df[['anger','joy','optimism','sadness']].idxmax(axis=1)
df['comment_emotion'] = np.where(df.emotion_score>0.5, df.comment_emotion, 'undefined')

In [None]:
df2 = df.groupby(['ds', 'theme', 'subtheme', 'country', 'newsId', 'prevalent_emotion']) \
       .agg({'content':'size', 'anger':'mean', 'joy':'mean', 'optimism':'mean', 'sadness':'mean', 'replyCount':'mean', 'retweetCount':'mean', 'likeCount':'mean', 'quoteCount':'mean'}) \
       .rename(columns={'content':'count'}) \
       .reset_index()
df2 = df2.merge(theme_desc).merge(subtheme_desc)
df2 = df2.drop(['theme_desc', 'subtheme_desc'], axis=1)
df2['newsId'] = df2.newsId.astype('int64')

In [None]:
df2[
    ~(df2.anger.isnull()) & ~(df2.joy.isnull()) & ~(df2.optimism.isnull()) & ~(df2.sadness.isnull())
].to_csv('agg_users_tweets.csv', index=False)

In [None]:
news_accounts['userId'] = news_accounts.userId.astype('int64')
news_accounts[['country', 'userId', 'displayname']].to_csv('lookup_news.csv', index=False)

In [None]:
df2

In [None]:
df2.dtypes

In [None]:
emotion_id = pd.DataFrame({'news_emotion_id':range(5),'prevalent_emotion':list(df2.prevalent_emotion.unique())})

In [None]:
country_id = pd.DataFrame({'country_id':range(12),'country':list(df2.country.unique())})

In [None]:
df2 = df2.merge(theme_desc).merge(subtheme_desc).merge(emotion_id).merge(country_id)
df2 = df2.drop(['country', 'prevalent_emotion', 'theme_desc', 'subtheme_desc'], axis=1)

In [None]:
df2.to_csv('emotion_per_theme.csv', index=False)
country_id.to_csv('lookup_coutry.csv', index=False)

In [None]:
df2[
    ~(df2.anger.isnull()) & ~(df2.joy.isnull()) & ~(df2.optimism.isnull()) & ~(df2.sadness.isnull())
].to_csv('agg_users_tweets.csv', index=False)

In [None]:
df2.isnull().sum()

In [None]:
df2[df2.anger.isnull()]

In [None]:
for t in [1,2,3,4,5,6,7,8]:
#for st in [11,12,21,22,41,42,43,44,51,52,53,61,62,63,64,65,66,71,72,73,74]:
    fig, axs = plt.subplots(figsize=(15, 6))
    #df = comments[comments.conversationId.isin(news_tweets[news_tweets.tweetId.isin(topics[topics[fr"subtheme{st}"]].tweetId)].conversationId)]
    df = comments[comments.conversationId.isin(news_tweets[news_tweets.tweetId.isin(topics[topics.theme==t].tweetId)].conversationId)]
    df = df.merge(news_tweets[~news_tweets.conversationId.isnull()].rename(columns={'date':'news_date'})[['conversationId', 'news_date']].drop_duplicates('conversationId'))
    #df['ds'] = pd.to_datetime(df.date.dt.year.astype(str) + '-' + df.date.dt.month.astype(str) + '-1')
    df['ds'] = df.news_date.dt.date
    df = df[['ds', 'anger', 'joy', 'optimism', 'sadness']]
    df = df.groupby('ds')[['anger', 'joy', 'optimism', 'sadness']].mean()
    df.plot.line(ax=axs)
    axs.set_xlabel("datetime")
    axs.set_ylabel("mean score per emotion")
    fig.savefig(fr"theme_monthly_{t}.png")

In [None]:
df = news_tweets[news_tweets.tweetId.isin(news_tweets_topics[news_tweets_topics.theme==8].tweetId)].copy()
df['ds'] = df.date.dt.date
df = df[['ds', 'anger', 'joy', 'optimism', 'sadness']]
df = df.groupby('ds')[['anger', 'joy', 'optimism', 'sadness']].mean()
df.plot.line(figsize=(15, 6))

In [None]:
df

In [None]:
df = df[['date', 'anger', 'joy', 'optimism', 'sadness']]
df.set_index('date').plot.line()

In [None]:
news_tweets_topics = pd.read_parquet('./news_tweets_topics.parquet')

In [None]:
news_tweets_topics[news_tweets_topics.topic_11 == tweets.iloc[14,:].topic_11]

In [None]:
news_tweets[news_tweets.tweetId==1315975830032592896]

In [None]:
news_tweets_topics[news_tweets_topics.tweetId==1315975830032592896]

In [None]:
comments[comments.conversationId==1315975830032592896]

In [None]:
tweets[tweets.conversationId==1315975830032592896]

In [None]:
tweets[(tweets.topic_11==tweets.iloc[14,:].topic_11)].head()

In [None]:
tweets[~tweets.topic_0.isnull()].head()

In [None]:
tweets.iloc[14,:].topic_11

In [None]:
tweets.shape

In [None]:
news_tweets.userId.nunique()

In [None]:
topics.topic.unique()

In [None]:
tweets = pd.read_parquet('./../data/raw/news_tweets.parquet').sample(frac=1, random_state=3)
tweets = tweets.reset_index(drop=True)

In [None]:
df = pd.read_csv('tweets_topics2.csv')

In [None]:
tweets['topic'] = df.Dominant_Topic