# Social Metrics

In [1]:
import json
import glob
import pandas as pd
import altair as alt
import pprint

In [2]:
files = glob.glob("tweets/newspapers/recent_tweets/*.json")
holder = []
for f in files:
    with open(f) as g:
        tweets = json.load(g)
        for item in tweets:
            for t in item['data']:
                temp = {"id": t["id"],
                    "account": f.split("/")[-1][:-5],
                        "created_at": t["created_at"],
                           "likes": t['public_metrics']["like_count"],
                            "quotes": t['public_metrics']["quote_count"],
                            "replies": t['public_metrics']["reply_count"],
                            "retweets": t['public_metrics']["retweet_count"],
                            "reply_settings": t['reply_settings'],
                            "source": t['source'],
                            "text": t['text']}
                holder.append(temp)

In [3]:
tweets = pd.DataFrame(holder)
tweets['created_at'] = pd.to_datetime(tweets['created_at'])
tweets.index = tweets['id']
del(tweets['id'])
print(tweets.info())

<class 'pandas.core.frame.DataFrame'>
Index: 1995 entries, 1523076460780556288 to 1520623310442143744
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype              
---  ------          --------------  -----              
 0   account         1995 non-null   object             
 1   created_at      1995 non-null   datetime64[ns, UTC]
 2   likes           1995 non-null   int64              
 3   quotes          1995 non-null   int64              
 4   replies         1995 non-null   int64              
 5   retweets        1995 non-null   int64              
 6   reply_settings  1995 non-null   object             
 7   source          1995 non-null   object             
 8   text            1995 non-null   object             
dtypes: datetime64[ns, UTC](1), int64(4), object(4)
memory usage: 155.9+ KB
None


In [4]:
def tweet_finder(data, key, value, account):
    sieve = data[data[key]==value]
    holder = []
    with open(f'tweets/newspapers/recent_tweets/{account}.json') as f:
        temp = json.load(f)
    for item in temp:
        for t in item['data']:
            if t['id'] in sieve.index:
                holder.append(t)
    print(f'The result set has {len(holder)} tweets.')
    return holder

## Retweets, Quote Tweets, Likes and Replies

In [5]:
grouper = tweets.groupby('account')
holder = []
for a, b in grouper:
    temp = b[['likes', 'quotes', 'replies', 'retweets']].copy()
    temp['account'] = a
    holder.append(temp)

In [6]:
metrics = pd.concat(holder)
z=metrics.describe()
z.style.format(precision=0, na_rep='MISSING', thousands=",")

Unnamed: 0,likes,quotes,replies,retweets
count,1995,1995,1995,1995
mean,15,1,6,4
std,114,12,36,20
min,0,0,0,0
25%,1,0,0,0
50%,3,0,1,1
75%,9,1,4,3
max,3825,403,1214,786


In [7]:
grouper = tweets.groupby('account')
holder = []
for a, b in grouper:
    temp = pd.DataFrame(b[['likes', 'quotes', 'replies', 'retweets']].unstack())
    temp.reset_index(inplace=True)
    temp['account'] = a
    holder.append(temp)

In [8]:
chart_metrics = pd.concat(holder)

chart_metrics.rename(columns={0:"value", "level_0":"metric"}, inplace=True)
del(chart_metrics['id'])
chart_metrics.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7980 entries, 0 to 1099
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   metric   7980 non-null   object
 1   value    7980 non-null   int64 
 2   account  7980 non-null   object
dtypes: int64(1), object(2)
memory usage: 249.4+ KB


In [9]:
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [10]:
metrics_boxplot = alt.Chart(
    chart_metrics).mark_boxplot(
).encode(x=alt.X('account:N', axis=alt.Axis(title="")),
         y=alt.Y('value:Q', axis=alt.Axis(title="count of metric")),
         tooltip=["account",
                  "metric",
                  "value"]).properties(
    width=200,
    title="Public Metrics").facet(
    'metric:N', columns=2)

metrics_boxplot

In [11]:
tweets[tweets.likes==tweets.likes.max()]

Unnamed: 0_level_0,account,created_at,likes,quotes,replies,retweets,reply_settings,source,text
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1522223815044050944,rtenews,2022-05-05 14:36:53+00:00,3825,403,824,786,everyone,Twitter Media Studio,President Michael D Higgins has described the ...


## Most Liked Tweet

RTÉ have the most liked tweet of the bunch with 3,825 likes. What was the tweet about?

In [12]:
print(tweets.loc["1522223815044050944"]['text'])
print(tweets.loc["1522223815044050944"]['created_at'].strftime("%B %d, %H:%M"))

President Michael D Higgins has described the purchase of Twitter by tech billionaire Elon Musk as a 'manifestation of an incredible and dangerous narcissism' | Read more: https://t.co/UcW55R2V7G https://t.co/5b9CZCjL3E
May 05, 14:36


The next-most liked tweet is from the Independent:

In [13]:
tweets[(tweets.account=='independent_ie')&(tweets.likes==2603)]

Unnamed: 0_level_0,account,created_at,likes,quotes,replies,retweets,reply_settings,source,text
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1521055649748160512,independent_ie,2022-05-02 09:15:01+00:00,2603,83,341,307,everyone,Buffer,A Catholic couple say they will go to jail rat...


In [14]:
print(tweets.loc["1521055649748160512"]['text'])
print(tweets.loc["1521055649748160512"]['created_at'].strftime("%B %d, %H:%M"))

A Catholic couple say they will go to jail rather than pay a €300 fine for travelling 70km to attend Mass during lockdown https://t.co/Msgt6bUEUL
May 02, 09:15


## Most Replies

The tweet with the most replies is also from the Independent:

In [15]:
tweets[(tweets.account=='independent_ie')&(tweets.replies==1214)]

Unnamed: 0_level_0,account,created_at,likes,quotes,replies,retweets,reply_settings,source,text
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1522224352930013185,independent_ie,2022-05-05 14:39:01+00:00,1513,297,1214,31,everyone,Buffer,‘We are pregnant!’- Brian Dowling and partner ...


In [16]:
print(tweets.loc["1522224352930013185"]['text'])
print(tweets.loc["1522224352930013185"]['created_at'].strftime("%B %d, %H:%M"))

‘We are pregnant!’- Brian Dowling and partner Arthur Gourounlian announce they’re expecting their first child. https://t.co/RV6q7AOEw6
May 05, 14:39


## Most Retweets

The tweet with the most likes is also the tweet with the most retweets - RTÉ's tweet about the President's remarks on Elon Musk's purchase of Twitter.

In [17]:
tweets[(tweets.account=='rtenews')&(tweets.retweets==786)]

Unnamed: 0_level_0,account,created_at,likes,quotes,replies,retweets,reply_settings,source,text
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1522223815044050944,rtenews,2022-05-05 14:36:53+00:00,3825,403,824,786,everyone,Twitter Media Studio,President Michael D Higgins has described the ...


In [18]:
print(tweets.loc["1522223815044050944"]['text'])
print(tweets.loc["1522223815044050944"]['created_at'].strftime("%B %d, %H:%M"))

President Michael D Higgins has described the purchase of Twitter by tech billionaire Elon Musk as a 'manifestation of an incredible and dangerous narcissism' | Read more: https://t.co/UcW55R2V7G https://t.co/5b9CZCjL3E
May 05, 14:36
