# The Four Individual Accounts

In [1]:
import json
import glob
import pandas as pd
import altair as alt
import pprint

In [2]:
files = glob.glob('tweets/newspapers/newspaper_account_details/*.json')
media_details = []
for f in files:
    with open(f) as g:
        temp = json.load(g)
        media_details.append(temp)

## Account Metadata

This is the metadata for a typical twitter account - [@independent_ie](https://twitter.com/Independent_ie) in this case.

In [3]:
pprint.pprint(media_details[4]['data'])

{'created_at': '2009-11-20T12:55:30.000Z',
 'description': "https://t.co/pT0SGCt8uD is Ireland's most read news website "
                'providing breaking news content to 13 million readers a '
                'month. Get in touch: contact@independent.ie.',
 'id': '91334232',
 'location': 'Dublin',
 'name': 'Independent.ie',
 'public_metrics': {'followers_count': 712402,
                    'following_count': 134,
                    'listed_count': 2650,
                    'tweet_count': 457011},
 'url': 'https://t.co/N0HTtXGDTV',
 'username': 'Independent_ie',
 'verified': True}


In [4]:
holder = []
for detail in media_details:
    temp = {"created_at": detail['data']['created_at'],
            "username": detail['data']['username'],
            "name": detail['data']['name'],
            "id": detail['data']['id'],
            "followers": detail['data']['public_metrics']['followers_count'],
            "following": detail['data']['public_metrics']['following_count'],
            "listed": detail['data']['public_metrics']['listed_count'],
            "tweets": detail['data']['public_metrics']['tweet_count'],
            "location": detail['data']['location'],
            "verified": detail['data']['verified']}
    holder.append(temp)

In [5]:
media_df = pd.DataFrame(holder)
media_df.index = media_df['id']
del(media_df['id'])
media_df['created_at'] = pd.to_datetime(media_df['created_at'])

media_df['followers_per_tweet'] = media_df['followers'] / media_df['tweets']
# media_df = media_df.round({'followers_per_tweet':2})
media_df.sort_values('followers', ascending=False, inplace=True)
media_df

Unnamed: 0_level_0,created_at,username,name,followers,following,listed,tweets,location,verified,followers_per_tweet
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
8973062,2007-09-19 12:52:21+00:00,rtenews,RTÉ News,1103818,308,4041,206940,Ireland,True,5.334
91334232,2009-11-20 12:55:30+00:00,Independent_ie,Independent.ie,712402,134,2650,457011,Dublin,True,1.558829
150246405,2010-05-31 13:08:52+00:00,thejournal_ie,TheJournal.ie,706618,746,2439,314417,Ireland,True,2.247391
15084853,2008-06-11 13:54:36+00:00,IrishTimes,The Irish Times,670120,156,4014,680596,Ireland,True,0.984608
19903360,2009-02-02 12:01:29+00:00,irishexaminer,Irish Examiner,241613,602,1750,443308,Ireland,True,0.545023


## The Four Accounts

This is a table of the four counts, showing when they were created, username and name, and other significant details.

In [6]:
media_df.style.format(precision=2, thousands=",",
                formatter={'created_at': lambda x: x.strftime('%B %d, %Y')
                          })

Unnamed: 0_level_0,created_at,username,name,followers,following,listed,tweets,location,verified,followers_per_tweet
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
8973062,"September 19, 2007",rtenews,RTÉ News,1103818,308,4041,206940,Ireland,1,5.33
91334232,"November 20, 2009",Independent_ie,Independent.ie,712402,134,2650,457011,Dublin,1,1.56
150246405,"May 31, 2010",thejournal_ie,TheJournal.ie,706618,746,2439,314417,Ireland,1,2.25
15084853,"June 11, 2008",IrishTimes,The Irish Times,670120,156,4014,680596,Ireland,1,0.98
19903360,"February 02, 2009",irishexaminer,Irish Examiner,241613,602,1750,443308,Ireland,1,0.55


## Observations

* RTÉ were the earliest adopters of Twitter, opening their account eighteen months after Twitter was founded.
* RTÉ are also the most popular account to follow, with 1.1 million followers.
* RTÉ are not the biggest tweeters however - that honour belongs to the Irish Times, with nearly seven hundred thousand tweets to show for their fourteen year activity.
* Four of the five accounts set their location as Ireland. Only the Independent specifies Dublin.

## Charts Followers per Account, Followers per Tweet per Account

In [7]:
media_df['followers '] = media_df.followers.apply(lambda x: "{:,}".format(x))
media_df['followers_per_tweet '] = media_df.followers_per_tweet.apply(lambda x: "{:.2f}".format(x))

In [8]:
bar = alt.Chart(media_df).mark_bar().encode(x=alt.X('name', sort='-y'),
                                            y='followers',
                                           tooltip=['name',
                                                    'followers ']).properties(
                                                                    title='Followers per Acccount',
                                                                      width=300)
bar2 = alt.Chart(media_df).mark_bar(color='crimson').encode(x=alt.X('name', sort='-y'),
                                                            y='followers_per_tweet',
                                           tooltip=['name', 'followers_per_tweet ']).properties(
                                                                        title='Followers per Tweet per Acccount',
                                                                      width=300)
bar | bar2