# Scrapping Facebook page posts data

The purpose of this script is to get the posts data from Facebook fanpage.

*Reference: https://developers.facebook.com/docs/graph-api/reference/post/*

In [1]:
#!pip install facebook-sdk

In [2]:
import facebook
import pandas as pd

In [3]:
TOKEN = 'YOUR ACCESS TOKEN' # replace the string with your token
PAGE_NAME = 'OranjeExpress.org'

DEFAULT_NR_LIMIT = 100 # max value=100; see reference

# define the field variables of interest
DEFAULT_FIELDS = [
    'post_id',
    'created_time',
    'message',
    'is_popular'
]

In [4]:
def getPostData (TOKEN, PAGE_NAME, FIELDS=DEFAULT_FIELDS, NR_LIMIT=DEFAULT_NR_LIMIT):
    '''
    get fb page posts data with given parameters using facebook api
    '''
    graph = facebook.GraphAPI(access_token = TOKEN)
    pageId = graph.get_object(PAGE_NAME, field='id')['id']
    print("The Id of page \'%s\' is %s." % (PAGE_NAME, pageId))

    data = [] # initial empty list to store data
    
    # get initial posts data
    posts = graph.get_connections(
                id=pageId, connection_name='posts', limit=NR_LIMIT,
                fields=','.join(FIELDS), # create a query string of combined fields
                )
    data += posts['data']
    
    while 'next' in posts['paging']:
        nextId = posts['paging']['next'].rsplit('after=')[1]
        posts = graph.get_connections(
                    id=pageId, connection_name='posts', limit=NR_LIMIT,
                    fields=','.join(FIELDS), # create a query string of combined fields
                    after=nextId
                    )
        data += posts['data']
        
    print('Completed! Total %i posts scrapped ^^!' % len(data))
            
    return data

In [5]:
d = getPostData(TOKEN, PAGE_NAME)

The Id of page 'OranjeExpress.org' is 225292010874776.
Completed! Total 1544 posts scrapped ^^!


In [6]:
# put data into pandas df
df = pd.DataFrame(d)
df.set_index('id', inplace=True)

# get a glance of data
df.sample(5)

Unnamed: 0_level_0,created_time,message,is_popular
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
225292010874776_947143772022926,2016-01-13T01:11:14+0000,【#荷式社會文化】菁英會議與殖民遺緒--荷蘭聯合國教科文組織國際專家會議會外篇\n作者：王舒...,False
225292010874776_924122320991738,2015-12-01T08:04:10+0000,【#荷事分享】\n\n前幾個月台灣行腳節目三立“愛玩客”，以名廚詹姆士掛帥來荷蘭拍攝，荷事生...,False
225292010874776_696375167099789,2014-09-09T01:05:00+0000,[荷式人文＆藝術]\n藝術家紀柏豪在荷蘭V2_的獨白\n作者：紀柏豪\n\n 今年獲得國立台...,False
225292010874776_1826896394047655,2018-04-07T07:00:00+0000,【#荷事分享】\n連星期日都還不到就開始犯收假厭世症了嗎？表示該換工作啦～～～有想過到歐洲工...,False
225292010874776_266613616742615,2012-02-06T16:36:24+0000,[荷式在角落 Dutchness]\n氣候變遷劇烈，荷蘭今年的嚴冬直到二月才開始。你已經開始...,False


In [7]:
# save file to csv
df.to_csv('./data/fb-posts.csv', encoding='utf_8_sig')