# Facebook posts 2020 year-end summary

To retrieve the counts of shares and likes per post Oranje Express published in 2020.

In [1]:
import facebook
import pandas as pd
import time 
from datetime import datetime

In [2]:
TOKEN = 'EAACZBZBlDXIYIBAHikvXz9OubQKZAyPFhhTq23Pue5uTQayIFjzZAryp0RL4noi5XAbLNetqayr3jNe3xRcCSfJZAAo6kMMCjzcvm3wuF4WwqS3bZCsfmF4fz3vgjfN9KVam0qk9RShGUqK0a28fYUa42SKZC6uztWUSLD2ibNZC2ZBeiJeoEgloratREWI4fMh0kBy1hgfEnkAZDZD' # replace the string with your token
PAGE_NAME = 'OranjeExpress.org'

In [3]:
# reaction types
REACTIONS = [
    'LIKE', 'LOVE', 'WOW', 'HAHA', 'SAD', 'ANGRY', 'THANKFUL', 'PRIDE', 'CARE'
]

# query for count per reactio type
qReactions = ['reactions.type(%s).limit(0).summary(total_count).as(reactions_%s)' 
              % (r.upper(), r.lower()) for r in REACTIONS]

In [4]:
# `limit`
DEFAULT_NR_LIMIT = 100 # max value=100

# define the field variables of interest
FIELDS = [
    'post_id',
    'created_time',
    'is_popular',
    'shares',
    ', '.join(qReactions) # reaction counts
]

# `since`: datetime posts published since
DATETIME_SINCE = "2020/01/01"

In [5]:
def getPostData (TOKEN, PAGE_NAME, FIELDS=FIELDS, 
                 NR_LIMIT=DEFAULT_NR_LIMIT, SINCE=DATETIME_SINCE):
    '''
    get fb page posts data with given parameters using facebook api
    '''
    graph = facebook.GraphAPI(access_token = TOKEN)
    pageId = graph.get_object(PAGE_NAME, field='id')['id']
    print("The Id of page \'%s\' is %s." % (PAGE_NAME, pageId))

    data = [] # initial empty list to store data

    # timestamp used for `since`
    timestamp_since = time.mktime(datetime.strptime(SINCE, "%Y/%m/%d").timetuple())
    
    # get initial posts data
    posts = graph.get_connections(
                id=pageId, connection_name='published_posts', limit=NR_LIMIT,
                fields=','.join(FIELDS), # create a query string of combined fields
                since=timestamp_since
                )
    data += posts['data']
    
    while 'next' in posts['paging']:
        nextId = posts['paging']['next'].rsplit('after=')[1]
        posts = graph.get_connections(
                    id=pageId, connection_name='published_posts', limit=NR_LIMIT,
                    fields=','.join(FIELDS), # create a query string of combined fields
                    after=nextId, since=timestamp_since
                    )
        data += posts['data']
        
    print('Completed! Total %i posts scrapped ^^!' % len(data))
            
    return data

In [6]:
d = getPostData(TOKEN, PAGE_NAME)

The Id of page 'OranjeExpress.org' is 225292010874776.
Completed! Total 214 posts scrapped ^^!


In [7]:
posts = pd.DataFrame(d)

# get a glance of the dataset
posts.sample()

Unnamed: 0,created_time,is_popular,shares,reactions_like,reactions_love,reactions_wow,reactions_haha,reactions_sad,reactions_angry,reactions_thankful,reactions_pride,reactions_care,id
196,2020-02-05T08:33:10+0000,False,{'count': 12},"{'data': [], 'summary': {'total_count': 46}}","{'data': [], 'summary': {'total_count': 0}}","{'data': [], 'summary': {'total_count': 0}}","{'data': [], 'summary': {'total_count': 1}}","{'data': [], 'summary': {'total_count': 0}}","{'data': [], 'summary': {'total_count': 0}}","{'data': [], 'summary': {'total_count': 0}}","{'data': [], 'summary': {'total_count': 0}}","{'data': [], 'summary': {'total_count': 0}}",225292010874776_3389817977755481


In [8]:
def getReactionTotalCount( reaction ):
    '''
    parse dict of certain reation type to its total count
    '''
    return reaction['summary']['total_count'] if type(reaction) == dict else 0

In [9]:
# dataframe subset without reactions & share
p0 = posts.filter(regex="^(?!reaction)\w+$").drop('shares', axis=1)

p_shares = posts.shares.apply(lambda x: x['count'] if type(x) == dict else 0)
p_reactions = posts.filter(regex="reaction_*").apply(lambda col: col.apply(getReactionTotalCount), axis=1)

# combine data subsets
posts_cls = (p0.merge(p_shares, left_index=True, right_index=True)
               .merge(p_reactions, left_index=True, right_index=True))

In [10]:
posts_cls.sample()

Unnamed: 0,created_time,is_popular,id,shares,reactions_like,reactions_love,reactions_wow,reactions_haha,reactions_sad,reactions_angry,reactions_thankful,reactions_pride,reactions_care
94,2020-07-13T07:00:05+0000,False,225292010874776_3970271873043419,2,42,0,4,0,0,0,0,0,0


In [11]:
# save to .csv
posts_cls.to_csv('./data/posts_2020ye.csv', index=False)