# Scrapping Facebook page posts data

The purpose of this script is to get the posts data from Facebook fanpage.

*Reference: https://developers.facebook.com/docs/graph-api/reference/post/*

In [1]:
#!pip install facebook-sdk

In [2]:
import facebook
import pandas as pd

In [3]:
TOKEN = 'YOUR ACCESS TOKEN' # replace the string with your token
PAGE_NAME = 'OranjeExpress.org'

DEFAULT_NR_LIMIT = 100 # max value=100; see reference

# define the field variables of interest
DEFAULT_FIELDS = [
    'post_id',
    'created_time',
    'message',
    'is_popular'
]

In [4]:
def getPostData (TOKEN, PAGE_NAME, FIELDS=DEFAULT_FIELDS, NR_LIMIT=DEFAULT_NR_LIMIT):
    
    graph = facebook.GraphAPI(access_token = TOKEN)
    pageId = graph.get_object(PAGE_NAME, field='id')['id']
    print("The Id of page\'%s\' is %s" % (PAGE_NAME, pageId))

    data = [] # initial empty list to store data
    
    # get initial posts data
    posts = graph.get_connections(
                id=pageId, connection_name='posts', limit=NR_LIMIT,
                fields=','.join(FIELDS), # create a query string of combined fields
                )
    data += posts['data']
    
    while 'next' in posts['paging']:
        nextId = posts['paging']['next'].rsplit('after=')[1]
        posts = graph.get_connections(
                    id=pageId, connection_name='posts', limit=NR_LIMIT,
                    fields=','.join(FIELDS), # create a query string of combined fields
                    after=nextId
                    )
        data += posts['data']
        
    print('Completed! Total %i posts scrapped ^^' % len(data))
            
    return data

In [5]:
d = getPostData(TOKEN, PAGE_NAME)

The Id of page'OranjeExpress.org' is 225292010874776
Completed! Total 1544 posts scrapped ^^


In [6]:
# put data into pandas df
df = pd.DataFrame(d)
df.set_index('id', inplace=True)

# get a glance of data
df.sample(5)

Unnamed: 0_level_0,created_time,message,is_popular
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
225292010874776_397486953664099,2012-12-27T11:00:57+0000,"[荷式城市藝術/ Dutch City Art] (For English, please ...",False
225292010874776_903542503049720,2015-10-16T07:21:18+0000,[荷事分享] 荷蘭設計周\n今天繼續連發分享，這週末事很多。荷蘭設計界每年度盛事Dutch ...,False
225292010874776_1195711093832858,2016-11-23T11:30:00+0000,【#荷事分享】\n台灣高齡化已是各方關注的重點，除了最近吵得沸沸揚的長照法外，政府也早從20...,False
225292010874776_819431658127472,2015-05-28T00:00:00+0000,[荷事分享] 台北村落之聲\n荷蘭紅燈區再生--阿姆斯特丹紅燈區改造研究規劃／鄭采和\n「性...,False
225292010874776_3840400426030565,2020-06-12T04:10:07+0000,【#荷事分享】荷蘭外送平台Just Eat Takeaway砸73億美元收購美國的競爭同業G...,False


In [7]:
# save file to csv
df.to_csv('./data/fb-posts.csv')