In [2]:
# -*- coding: utf8 -*-

# version: 3.5


import sys
import urllib.request
import json
import datetime
import csv
import time

In [3]:
#[CODE 1]

def get_request_url(url):

    req = urllib.request.Request(url)

    try:
        response = urllib.request.urlopen(req)
        if response.getcode() == 200:
            print ("[%s] Url Request Success" % datetime.datetime.now())
            return response.read().decode('utf-8')
    except Exception as e:
        print(e)
        print("[%s] Error for URL : %s" % (datetime.datetime.now(), url))
        return None

In [4]:
#[CODE 2]

def getFacebookNumericID(page_id, access_token):

    base = "https://graph.facebook.com/v2.8"
    node = "/" + page_id
    parameters = "/?access_token=%s" % access_token
    url = base + node + parameters

    retData = get_request_url(url)

    if (retData == None):
        return None
    else:
        jsonData = json.loads(retData)
        return jsonData['id']

In [5]:
#[CODE 3]

def getFacebookPost(page_id, access_token, from_date, to_date, num_statuses):

    base = "https://graph.facebook.com/v2.8"
    node = "/%s/posts" % page_id
    fields = "/?fields=id,message,link,name,type,shares,reactions," + \
             "created_time,comments.limit(0).summary(true)" + \
             ".limit(0).summary(true)"
    duration = "&since=%s&until=%s" % (from_date, to_date)
    parameters = "&limit=%s&access_token=%s" % (num_statuses, access_token)
    url = base + node + fields + duration + parameters

    retData = get_request_url(url)

    if (retData == None):
        return None
    else:
        return json.loads(retData)


def getPostItem(post, key):
    try:
        if key in post.keys():
            return post[key]
        else:
            return ''
    except:
        return ''

def getPostTotalCount(post, key):
    try:
        if key in post.keys():
            return post[key]['summary']['total_count']
        else:
            return 0
    except:
        return 0

In [6]:
#[CODE 4]

def getPostData(post, access_token, jsonResult):

    #[CODE 4-1]

    post_id = getPostItem(post, 'id')
    post_message = getPostItem(post, 'message')
    post_name = getPostItem(post, 'name')
    post_link = getPostItem(post, 'link')
    post_type = getPostItem(post, 'type')

    post_num_reactions = getPostTotalCount(post, 'reactions')
    post_num_comment = getPostTotalCount(post, 'comments')
    post_num_shares = 0 if 'shares' not in post.keys() else post['shares']['count']

    #[CODE 4-2]

    post_created_time = getPostItem(post, 'created_time')
    post_created_time = datetime.datetime.strptime(post_created_time, '%Y-%m-%dT%H:%M:%S+0000')
    post_created_time = post_created_time + datetime.timedelta(hours=+9)
    post_created_time = post_created_time.strftime('%Y-%m-%d %H:%M:%S')

    #[CODE 4-3]

    reaction = getFacebookReaction(post_id, access_token) if post_created_time > '2016-02-24 00:00:00' else {}
    post_num_likes = getPostTotalCount(reaction, 'like')
    post_num_likes = post_num_reactions if post_created_time < '2016-02-24 00:00:00' else post_num_likes

    #[CODE 4-4]

    post_num_loves = getPostTotalCount(reaction, 'love')
    post_num_wows = getPostTotalCount(reaction, 'wow')
    post_num_hahas = getPostTotalCount(reaction, 'haha')
    post_num_sads = getPostTotalCount(reaction, 'sad')
    post_num_angrys = getPostTotalCount(reaction, 'angry')

    jsonResult.append({'post_id':post_id, 'message':post_message,
                    'name':post_name, 'link':post_link,
                    'created_time':post_created_time, 'num_reactions':post_num_reactions,
                    'num_comments':post_num_comment, 'num_shares':post_num_shares,
                    'num_likes':post_num_likes, 'num_loves':post_num_loves,
                    'num_wows':post_num_wows, 'num_hahas':post_num_hahas,
                    'num_sads':post_num_sads, 'num_angrys':post_num_angrys})

In [7]:
#[CODE 5]

def getFacebookReaction(post_id, access_token):

    base = "https://graph.facebook.com/v2.8"
    node = "/%s" % post_id
    reactions = "/?fields=" \
                "reactions.type(LIKE).limit(0).summary(total_count).as(like)" \
                ",reactions.type(LOVE).limit(0).summary(total_count).as(love)" \
                ",reactions.type(WOW).limit(0).summary(total_count).as(wow)" \
                ",reactions.type(HAHA).limit(0).summary(total_count).as(haha)" \
                ",reactions.type(SAD).limit(0).summary(total_count).as(sad)" \
                ",reactions.type(ANGRY).limit(0).summary(total_count).as(angry)"
    parameters = "&access_token=%s" % access_token
    url = base + node + reactions + parameters

    retData = get_request_url(url)

    if (retData == None):
        return None
    else:
        return json.loads(retData)

In [8]:
#[CODE 6]

def main():
    page_name = "jtbcnews"
    app_id = "200920440387013"
    app_secret = "daccef14d5cd41c0e95060d65e66c41d"
    access_token = app_id + "|" + app_secret

    from_date = '2016-10-01'
    to_date = '2017-03-12'

    num_statuses = 10
    go_next = True
    jsonResult = []

    page_id = getFacebookNumericID(page_name, access_token)

    if (page_id == None):
        print ("[%s] %s is Invalid Page Name" % (datetime.datetime.now(), page_name))
        exit()

    print ("[%s] %s page id is %s" % (datetime.datetime.now(), page_name, page_id))

    #[CODE 6-1]

    jsonPost = getFacebookPost(page_id, access_token, from_date, to_date, num_statuses)

    if (jsonPost == None):
        print ("No DATA")
        exit()

    #[CODE 6-2]

    while (go_next):
        for post in jsonPost['data']:
            getPostData(post, access_token, jsonResult)

        if 'paging' in jsonPost.keys():
            jsonPost =  json.loads(get_request_url(jsonPost['paging']['next']))
        else:
            go_next = False

    #[CODE 6-3]

    with open('%s_facebook_%s_%s.json' % (page_name, from_date, to_date), 'w', encoding='utf8') as outfile:
        str_ = json.dumps(jsonResult,
                      indent=4, sort_keys=True,
                      ensure_ascii=False)
        outfile.write(str_)

    print ('%s_facebook_%s_%s.json SAVED' % (page_name, from_date, to_date))

if __name__ == '__main__':
    main()

[2019-03-21 23:25:48.508736] Url Request Success
[2019-03-21 23:25:48.512512] jtbcnews page id is 240263402699918
[2019-03-21 23:25:50.512165] Url Request Success
[2019-03-21 23:25:52.053045] Url Request Success
[2019-03-21 23:25:52.713281] Url Request Success
[2019-03-21 23:25:53.379539] Url Request Success
[2019-03-21 23:25:55.224601] Url Request Success
[2019-03-21 23:25:55.880376] Url Request Success
[2019-03-21 23:25:56.555014] Url Request Success
[2019-03-21 23:25:57.232193] Url Request Success
[2019-03-21 23:25:58.277400] Url Request Success
[2019-03-21 23:25:59.319758] Url Request Success
[2019-03-21 23:26:01.059962] Url Request Success
[2019-03-21 23:26:02.828232] Url Request Success
[2019-03-21 23:26:03.736796] Url Request Success
[2019-03-21 23:26:04.440047] Url Request Success
[2019-03-21 23:26:06.285170] Url Request Success
[2019-03-21 23:26:08.434234] Url Request Success
[2019-03-21 23:26:09.253098] Url Request Success
[2019-03-21 23:26:09.937220] Url Request Success
[201

[2019-03-21 23:28:43.508530] Url Request Success
[2019-03-21 23:28:44.140000] Url Request Success
[2019-03-21 23:28:44.902803] Url Request Success
[2019-03-21 23:28:46.201411] Url Request Success
[2019-03-21 23:28:48.249925] Url Request Success
[2019-03-21 23:28:48.889263] Url Request Success
[2019-03-21 23:28:49.672045] Url Request Success
[2019-03-21 23:28:51.038516] Url Request Success
[2019-03-21 23:28:51.725563] Url Request Success
[2019-03-21 23:28:52.349886] Url Request Success
[2019-03-21 23:28:54.628787] Url Request Success
[2019-03-21 23:28:55.856505] Url Request Success
[2019-03-21 23:28:56.847895] Url Request Success
[2019-03-21 23:28:58.109481] Url Request Success
[2019-03-21 23:28:58.754754] Url Request Success
[2019-03-21 23:28:59.380083] Url Request Success
[2019-03-21 23:29:00.010397] Url Request Success
[2019-03-21 23:29:00.725485] Url Request Success
[2019-03-21 23:29:01.382770] Url Request Success
[2019-03-21 23:29:02.410977] Url Request Success
[2019-03-21 23:29:04

In [55]:
import json
from collections import OrderedDict
from pprint import pprint

with open('jtbcnews_facebook_2017-02-01_2017-02-03.json', encoding="utf-8") as data_file:
    data = json.load(data_file, object_pairs_hook=OrderedDict)
 
pprint(data)

[OrderedDict([('created_time', '2017-02-02 22:20:00'),
              ('link',
               'https://www.facebook.com/jtbcnews/videos/1308067519252829/'),
              ('message',
               "'반기문 하차'로 더 속도 내는 대선열차\n"
               '\n'
               '1위 문재인 (33.1%)\n'
               '2위 안희정 (12.3%)\n'
               '3위 황교안 (11.8%)\n'
               '\n'
               '▶http://bit.ly/2jA6Wqt 기사원문'),
              ('name', "대선열차 더 속도…3자 대결 시 문재인 지지율 '과반'"),
              ('num_angrys', 27),
              ('num_comments', 286),
              ('num_hahas', 71),
              ('num_likes', 1546),
              ('num_loves', 42),
              ('num_reactions', 0),
              ('num_sads', 2),
              ('num_shares', 62),
              ('num_wows', 6),
              ('post_id', '240263402699918_1308067519252829')]),
 OrderedDict([('created_time', '2017-02-01 21:10:08'),
              ('link',
               'https://www.facebook.com/jtbcnews/videos/1306803269379254/'),
    