In [3]:
import requests,re
import sys
sys.path.append('/usr/local/lib/python2.7/dist-packages/')
import textblob
import langid
import logging
import collections
import pymongo,time
from secrets import *

### Summary
Script to scrape content from Facebook API.
Collects
1. Pages matching a keyword query
2. Recent posts from these pages (limit set by API)
3. Comments on these posts (it seems that likes on comments are not available)
4. Likes on these posts  

### Errors

Needs robust error handling due to errors with API requests. More detail [here](https://developers.facebook.com/docs/graph-api/using-graph-api/v2.4#errors)

* ``Error 500 {"error":{"message":"An unexpected error has occurred. Please retry your request later.","type":"OAuthException","is_transient":true,"code":2}}``
* ``Error 500 {"error":{"code":1,"message":"An unknown error occurred"}}``
* ``ConnectionError: ('Connection aborted.', gaierror(-2, 'Name or service not known'))``

### TODO

* ~~Get likes on comments~~
* ~~Have a restart page ID, so we can pick up if scraping fails~~
* Write a script to check for new posts on pages that already exist in DB 
* Add a record of when data was pulled  

Mongo tutorial [here](http://api.mongodb.org/python/current/tutorial.html)

In [4]:
from pymongo import MongoClient
client = MongoClient()
db = client.fb
pagesCollection = db.pages
postsCollection = db.posts
commentsCollection = db.comments
likesCollection=db.likes

### Set up query to grab pages

In [5]:

ands=[]
#ands.append('أوروبا') # Europe
#ands.append('ألمانيا')
#ands.append('المانيا') # Germany (two alternate spellings)

ors=[]
ors.append('مهاجرون') # Refugees
ors.append('مهاجرين') # Refugees
ors.append('المهاجرين') # The refugees
ors.append('المهاجرون') # The refugees
ors.append('المهاجرون') # The refugees
ors.append('مهاجرون') # refugees
ors.append('هجرة') # Migration
ors.append('الهجرة') # The migration
ors.append('أوروبا') # Europe
ors.append('سوريا') # Syria

ors=[]
ors.append('مهرب') # Trafficker
ors.append('المتاجرين') # Traffickers

#QUERY='+'.join(ands)+'+'
#QUERY='|'.join(ors)
#QUERY='syria'
#QUERY='سوريا'
QUERY='أوروبا' # Europe
#QUERY=u'اوروبا'
#QUERY=u'اللجوء' # asylum
#QUERY=u'ملجأ' # asylum
# 'Europe' + 'ANY ['migration']...'
LIMIT=1000
# Page limit
postsLimit=250
# Hard limit from API=250

nSkip=5
# Hit API for query nSkip times
# before skipping

nWait=60
# Wait between API errors

postSleepTime=0.5
pageSleepTime=10
# Pause so API not thrashed

### Some boilerplate DB functions

In [6]:
def countCollections():
    print '%d pages' % pagesCollection.count()
    print '%d posts' % postsCollection.count()
    print '%d comments' % commentsCollection.count()
    print '%d likes' % likesCollection.count()

In [7]:
def clean(s):
    if s:
        s=re.sub(',|;|:|"|\'|\?|\(|\)|\n|\t|\-|\=|\+',' ',s.lower())
        return s.strip()
    else:
        return None

In [34]:
def addCommentsToDb(commentsData):
    nAdded=nAlready=0
    for comment in commentsData['data']:
        if not isCommentInDb(comment['id']):
            commentsCollection.insert_one(comment)
            nAdded+=1
        else:
            nAlready+=1
    logging.warning('%d comments added (%d already in DB)' % (nAdded,nAlready))

In [9]:
def addPageToDb(page):
    pagesCollection.insert_one(page)

In [10]:
def clearCollection(collection=None):
    
    all=False
    answer=True
    
    if not collection or collection.lower().strip()=='all':
        answer=raw_input('Clear all?')
        if answer.lower().strip() in ['y','yes']:
            all=True
            
    if all or collection=='likes':
        if not all:answer=raw_input('Clear likes?')
        if all or answer.lower().strip() in ['y','yes']:
            res=likesCollection.remove()
            print 'Cleared %d likes' % res['n']
            
    if all or collection=='pages':
        if not all:answer=raw_input('Clear pages?')
        if all or answer.lower().strip() in ['y','yes']:
            res=pagesCollection.remove()
            print 'Cleared %d pages' % res['n']

            
    if all or collection=='comments':
        if not all:answer=raw_input('Clear comments?')
        if all or answer.lower().strip() in ['y','yes']:
            res=commentsCollection.remove()
            print 'Cleared %d comments' % res['n']

            
    if all or collection=='posts':
        if not all:answer=raw_input('Clear posts?')
        if all or answer.lower().strip() in ['y','yes']:
            res=postsCollection.remove()
            print 'Cleared %d posts' % res['n']


In [11]:
countCollections()

1145 pages
101760 posts
208335 comments
77795 likes


In [None]:
clearCollection('all')

In [33]:
def addLikesToDb(likes):
    nAdded=nAlready=0
    for like in likes['data']:
        if not isLikeInDb(like['id']):
            likesCollection.insert_one(like)
            nAdded+=1
        else:
            nAlready+=1
    logging.warning('%d likes added (%d already in DB)' % (nAdded,nAlready))

In [13]:
def addPostToDb(post):
    print 'Adding posts'
    postsCollection.insert_one(post)

In [14]:
def isLikeInDb(id):
    '''
    Tests if a like is in comments collection
    Returns Bool
    '''
    nMatches=likesCollection.find({'id':id}).count()
    if nMatches==0:
        return False
    elif nMatches==1:
        return True
    else:
        logging.warning('Duplicate like %s' % id)
        return True

In [15]:
def isCommentInDb(id):
    '''
    Tests if a comment is in comments collection
    Returns Bool
    '''
    nMatches=commentsCollection.find({'id':id}).count()
    if nMatches==0:
        return False
    elif nMatches==1:
        return True
    else:
#        logging.warning('Duplicate comment %s' % id)
        return True

In [16]:
def isPostInDb(id):
    '''
    Tests if a post is in post collection
    Returns Bool
    '''
    nMatches=postsCollection.find({'id':id}).count()
    if nMatches==0:
        return False
    elif nMatches==1:
        return True
    else:
#        logging.warning('Duplicate post %s' % id)
        return True

In [17]:
def isPageInDb(id):
    '''
    Tests if a page is in pages collection
    Returns Bool
    '''
    nMatches=pagesCollection.find({'id':id}).count()
    if nMatches==0:
        return False
    elif nMatches==1:
        return True
    else:
#        logging.warning('Duplicate page %s' % id)
        return True

### How to deal with API errors

In [18]:
def handleResult(statusCode,returnText):
    '''
    Parses API call result to determine if successful
    or to wait or abandon
    Returns success,skip (both Bool)
    '''
    if statusCode==200:
        # OK
        return True,False
    
    if statusCode in [102,10,463,467]:
        # Access token expired
        logging.warning('API error: %d %s' % (statusCode,returnText))
        return False,True
    elif statusCode in [2,4,17,341,500]:
        # Wait and retry
        logging.warning('API error - waiting: %d %s' % (statusCode,returnText))
        return False,False
    elif statusCode in [506,1609005]:
        # Skip
        logging.warning('API error - skipping: %d %s' % (statusCode,returnText))
        return False,True
    else:
        logging.warning('API error - unknown code %d %s' % (statusCode,returnText))
        return False, True

### Start by looping through pages

In [26]:
######################################################
success=None
nAttempts=0

temp='https://graph.facebook.com/search?q=%s&limit=%d&type=page&access_token=%s' % (QUERY, LIMIT, ACCESSTOKEN)

while not success:
    # Keep looping if unsuccessful
    r=requests.get(temp)
    success,skip=handleResult(r.status_code,r.text)
    # Try, find out if successful or should skip
    
    if skip or nAttempts==nSkip:
        # If tried nSkip times or if should skip
        r={'data':[],'paging':None}
        if nAttempts==nSkip:
            logging.warning('Skipping after %d attempts' % nAttempts)
            break
    time.sleep(nWait)
    nAttempts+=1
######################################################

In [19]:
r.json()['data'][0]

{u'category': u'Non-Profit Organization',
 u'category_list': [{u'id': u'2603', u'name': u'Non-Profit Organization'}],
 u'id': u'1474221879460894',
 u'name': u'\u0645\u0647\u0631\u0628\u0627\u0646\u0647'}

In [66]:
getPostsFromPage(307091372652912)



Post 0 None 307091372652912_1001167463245296
Likes keys



 [u'paging', u'data']
{u'id': u'1503171200008103', u'name': u'\u0645\u0647\u0646\u062f \u0627\u0644\u0628\u062d\u0627\u0631 \u0627\u0644\u0628\u062d\u0627\u0631'}
Got likes paging
Likes paging keys [u'cursors', u'next']
Returning like results 200




Got next page of likes (current) (25 so far)
Returning like results 200




Got next page of likes (current) (50 so far)
Returning like results 200




Got next page of likes (current) (75 so far)
Returning like results 200




Got next page of likes (current) (100 so far)
Returning like results 200




Got next page of likes (current) (125 so far)
Returning like results 200




Got next page of likes (current) (150 so far)
Returning like results 200




Got next page of likes (current) (175 so far)
Returning like results 200




Got next page of likes (current) (200 so far)
Got 208 likes after paging
Post 1 اخبار المهاجرين في اوروبا 307091372652912_822407197787991




Likes keys



 [u'paging', u'data']
{u'id': u'1667201136857957', u'name': u'Baidari Sahel'}
Got likes paging
Likes paging keys [u'cursors', u'next']
Returning like results 200




Got next page of likes (current) (25 so far)
Returning like results 200




Got next page of likes (current) (50 so far)
Returning like results 200




Got next page of likes (current) (75 so far)
Returning like results 200




Got next page of likes (current) (100 so far)
Returning like results 200




Got next page of likes (current) (125 so far)
Returning like results 200




Got next page of likes (current) (150 so far)
Returning like results 200




Got next page of likes (current) (175 so far)
Returning like results 200




Got next page of likes (current) (200 so far)
Returning like results 200




Got next page of likes (current) (225 so far)
Returning like results 200




Got next page of likes (current) (250 so far)
Got 254 likes after paging
Post 2 None 307091372652912_822401507788560




Likes keys



 [u'paging', u'data']
{u'id': u'1506779722954327', u'name': u'\u0645\u0636\u0644\u0648\u0645 \u0627\u0644\u0645\u0636\u0644\u0648\u0645'}
Got likes paging
Likes paging keys [u'cursors', u'next']
Returning like results 200




Got next page of likes (current) (25 so far)
Returning like results 200




Got next page of likes (current) (50 so far)
Returning like results 200




Got next page of likes (current) (75 so far)
Returning like results 200




Got next page of likes (current) (100 so far)
Returning like results 200




Got next page of likes (current) (125 so far)
Returning like results 200




Got next page of likes (current) (150 so far)
Got 151 likes after paging
Post 3 experiencesvideoludiques.com 307091372652912_822400144455363




Likes keys



 [u'paging', u'data']
{u'id': u'1506779722954327', u'name': u'\u0645\u0636\u0644\u0648\u0645 \u0627\u0644\u0645\u0636\u0644\u0648\u0645'}
Got likes paging
Likes paging keys [u'cursors', u'next']
Returning like results 200




Got next page of likes (current) (25 so far)
Returning like results 200




Got next page of likes (current) (50 so far)
Returning like results 200




Got next page of likes (current) (75 so far)
Returning like results 200




Got next page of likes (current) (100 so far)
Got 117 likes after paging
Post 4 None 307091372652912_822395634455814




Likes keys



 [u'paging', u'data']
{u'id': u'1506779722954327', u'name': u'\u0645\u0636\u0644\u0648\u0645 \u0627\u0644\u0645\u0636\u0644\u0648\u0645'}
Got likes paging
Likes paging keys [u'cursors', u'next']
Returning like results 200




Got next page of likes (current) (25 so far)
Returning like results 200




Got next page of likes (current) (50 so far)
Got 62 likes after paging
Post 5 www.mohager.com 307091372652912_819297228098988




Likes keys



 [u'paging', u'data']
{u'id': u'1506779722954327', u'name': u'\u0645\u0636\u0644\u0648\u0645 \u0627\u0644\u0645\u0636\u0644\u0648\u0645'}
Got likes paging
Likes paging keys [u'cursors', u'next']
Returning like results 200




Got next page of likes (current) (25 so far)
Returning like results 200




Got next page of likes (current) (50 so far)
Got 58 likes after paging
Post 6 None 307091372652912_819296131432431




Likes keys



 [u'paging', u'data']
{u'id': u'176789819327192', u'name': u'\u0645\u0631\u064a\u0645 \u062a\u0645\u0648\u0632'}
Got likes paging
Likes paging keys [u'cursors', u'next']
Returning like results 200




Got next page of likes (current) (25 so far)
Returning like results 200




Got next page of likes (current) (50 so far)
Got 62 likes after paging
Post 7 None 307091372652912_818180994877278




Likes keys



 [u'paging', u'data']
{u'id': u'176789819327192', u'name': u'\u0645\u0631\u064a\u0645 \u062a\u0645\u0648\u0632'}
Got likes paging
Likes paging keys [u'cursors', u'next']
Returning like results 200




Got next page of likes (current) (25 so far)
Returning like results 200




Got next page of likes (current) (50 so far)
Got 64 likes after paging
Post 8 None 307091372652912_818180434877334




Likes keys



 [u'paging', u'data']
{u'id': u'176789819327192', u'name': u'\u0645\u0631\u064a\u0645 \u062a\u0645\u0648\u0632'}
Got likes paging
Likes paging keys [u'cursors', u'next']
Returning like results 200




Got next page of likes (current) (25 so far)
Got 48 likes after paging
Post 9 None 307091372652912_818180248210686




Likes keys



 [u'paging', u'data']
{u'id': u'176789819327192', u'name': u'\u0645\u0631\u064a\u0645 \u062a\u0645\u0648\u0632'}
Got likes paging
Likes paging keys [u'cursors', u'next']
Returning like results 200




Got next page of likes (current) (25 so far)
Got 28 likes after paging
Post 10 http://www.ostio.de/wp-content/uploads/2013/02/Schengen-Visa.jpg 307091372652912_818179801544064




Likes keys



 [u'paging', u'data']
{u'id': u'1641863749394665', u'name': u'Rami Ahmet'}
Got likes paging
Likes paging keys [u'cursors', u'next']
Returning like results 200




Got next page of likes (current) (25 so far)
Got 42 likes after paging
Post 11 None 307091372652912_818168661545178




Likes keys



 [u'paging', u'data']
{u'id': u'143781249309411', u'name': u'Ali Hassan'}
Got likes paging
Likes paging keys [u'cursors', u'next']
Returning like results 200




Got next page of likes (current) (25 so far)
Returning like results 200




Got next page of likes (current) (50 so far)
Returning like results 200




Got next page of likes (current) (75 so far)
Returning like results 200




Got next page of likes (current) (100 so far)
Got 102 likes after paging
Post 12 http://www.hibapress.com/upload/2742014-1c06e.jpg 307091372652912_818165441545500




Likes keys



 [u'paging', u'data']
{u'id': u'896969257005362', u'name': u'Aymen Shindi'}
Got likes paging
Likes paging keys [u'cursors']
Got 25 likes after paging
Post 13 http://upload-alkompis.s3-eu-west-1.amazonaws.com/iblock/6dc/6dcbfc3753cb7e16484d8f2b4e5715d4/b9f531e3d305b8ad0033aa1e439ef2be.jpg 307091372652912_818163758212335




Likes keys



 [u'paging', u'data']
{u'id': u'487334434772182', u'name': u'Ahsen Toujour Bien'}
Got likes paging
Likes paging keys [u'cursors', u'next']
Returning like results 200




Got next page of likes (current) (25 so far)
Got 37 likes after paging
Post 14 http://www.oxfordnador.com/wp-content/uploads/2014/02/francais.jpg 307091372652912_818162498212461




Likes keys



 [u'paging', u'data']
{u'id': u'1657998264413131', u'name': u'\u0645\u0648\u0633\u0649 \u0627\u0644\u0631\u0628\u064a\u0639\u064a'}
Got likes paging
Likes paging keys [u'cursors', u'next']
Returning like results 200




Got next page of likes (current) (25 so far)
Got 34 likes after paging
Post 15 http://www.aawsat.com/2010/02/26/images/hassad1.558761.jpg 307091372652912_818161094879268




Likes keys



 [u'paging', u'data']
{u'id': u'141819386166437', u'name': u'\u0635\u0627\u0644\u062d \u0627\u0644\u0635\u064a\u0627\u062c'}
Got likes paging
Likes paging keys [u'cursors', u'next']
Returning like results 200




Got next page of likes (current) (25 so far)
Got 39 likes after paging
Post 16 Freestyle royal Drifting Maroc 307091372652912_671049906257055




Likes keys



 [u'paging', u'data']
{u'id': u'451080188398894', u'name': u'\u0635\u0641\u0627\u0621 \u0635\u0627\u062f\u0642 \u0627\u0644\u0632\u064a\u062f\u064a'}
Got likes paging
Likes paging keys [u'cursors']
Got 21 likes after paging
Post 17 السلطات الفرنسية تغرم يهوديا رفض تشغيل المغاربة في محله 307091372652912_642736432421736




Likes keys



 [u'paging', u'data']
{u'id': u'438360176364340', u'name': u'\u064a\u0648\u0633\u0641 \u062d\u0631\u0628'}
Got likes paging
Likes paging keys [u'cursors', u'next']
Returning like results 200




Got next page of likes (current) (25 so far)
Got 34 likes after paging
Post 18 None 307091372652912_642604492434930




Likes keys



 [u'paging', u'data']
{u'id': u'431091697090504', u'name': u'Mosta Lion'}
Got likes paging
Likes paging keys [u'cursors', u'next']
Returning like results 200




Got next page of likes (current) (25 so far)
Returning like results 200




Got next page of likes (current) (50 so far)
Returning like results 200




Got next page of likes (current) (75 so far)
Returning like results 200




Got next page of likes (current) (100 so far)
Got 109 likes after paging
Post 19 None 307091372652912_642601905768522




Likes keys



 [u'paging', u'data']
{u'id': u'162960527381628', u'name': u'\u0645\u062d\u0645\u062f \u0627\u0644\u0645\u0635\u0637\u0641\u0649'}
Got likes paging
Likes paging keys [u'cursors', u'next']
Returning like results 200




Got next page of likes (current) (25 so far)
Got 42 likes after paging
Post 20 None 307091372652912_642600225768690




Likes keys



 [u'paging', u'data']
{u'id': u'416698548527630', u'name': u'Rahaf Kasem'}
Got likes paging
Likes paging keys [u'cursors']
Got 22 likes after paging
Post 21 None 307091372652912_642588135769899




Likes keys



 [u'paging', u'data']
{u'id': u'416698548527630', u'name': u'Rahaf Kasem'}
Got likes paging
Likes paging keys [u'cursors']
Got 22 likes after paging
Post 22 None 307091372652912_642128479149198




Likes keys



 [u'paging', u'data']
{u'id': u'427850697404576', u'name': u'\u0645\u062d\u0645\u0648\u062f \u063a\u0648\u0627\u0646\u0645\u0647'}
Got likes paging
Likes paging keys [u'cursors']
Got 18 likes after paging
Post 23 None 307091372652912_642127705815942




Likes keys



 [u'paging', u'data']
{u'id': u'558669827620767', u'name': u'\u0627\u062f\u0631\u064a\u0633 \u0635\u0627\u062f\u0642'}
Got likes paging
Likes paging keys [u'cursors']
Got 17 likes after paging
Post 24 None 307091372652912_642125172482862




Likes keys



 [u'paging', u'data']
{u'id': u'10153320097876137', u'name': u'Ahmed M. Abd-Elhamid'}
Got likes paging
Likes paging keys [u'cursors']
Got 15 likes after paging
Post 25 None 307091372652912_642116755817037




Likes keys



 [u'paging', u'data']
{u'id': u'1503364629980301', u'name': u'Adel Amine'}
Got likes paging
Likes paging keys [u'cursors', u'next']
Returning like results 200




Got next page of likes (current) (25 so far)
Returning like results 200




Got next page of likes (current) (50 so far)
Returning like results 200




Got next page of likes (current) (75 so far)
Got 98 likes after paging
Post 26 None 307091372652912_497370936958287




Likes keys



 [u'paging', u'data']
{u'id': u'941764019213094', u'name': u'Hakim Acounah'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 27 None 307091372652912_497362880292426




Likes keys



 [u'paging', u'data']
{u'id': u'558669827620767', u'name': u'\u0627\u062f\u0631\u064a\u0633 \u0635\u0627\u062f\u0642'}
Got likes paging
Likes paging keys [u'cursors']
Got 10 likes after paging
Post 28 None 307091372652912_495232237172157




Likes keys



 [u'paging', u'data']
{u'id': u'558669827620767', u'name': u'\u0627\u062f\u0631\u064a\u0633 \u0635\u0627\u062f\u0642'}
Got likes paging
Likes paging keys [u'cursors']
Got 9 likes after paging
Post 29 None 307091372652912_495231273838920




Likes keys



 [u'paging', u'data']
{u'id': u'558669827620767', u'name': u'\u0627\u062f\u0631\u064a\u0633 \u0635\u0627\u062f\u0642'}
Got likes paging
Likes paging keys [u'cursors']
Got 7 likes after paging
Post 30 None 307091372652912_495230630505651




Likes keys



 [u'paging', u'data']
{u'id': u'1635149450094135', u'name': u'Agarib Sadi'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 31 None 307091372652912_495230050505709




Post 32 None 307091372652912_495229080505806




Likes keys



 [u'paging', u'data']
{u'id': u'465619646953265', u'name': u'\u062c\u064a\u0645\u064a \u063a\u0627\u0644\u0628'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 33 None 307091372652912_495228327172548




Likes keys



 [u'paging', u'data']
{u'id': u'1674603119442458', u'name': u'\u0627\u0644\u0648\u0644 \u0627\u0644\u0648\u0644'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 34 None 307091372652912_495227800505934




Likes keys



 [u'paging', u'data']
{u'id': u'465619646953265', u'name': u'\u062c\u064a\u0645\u064a \u063a\u0627\u0644\u0628'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 35 None 307091372652912_495227287172652




Likes keys



 [u'paging', u'data']
{u'id': u'1609655082621040', u'name': u'Abdo Shbat'}
Got likes paging
Likes paging keys [u'cursors']
Got 4 likes after paging
Post 36 None 307091372652912_495225437172837




Likes keys



 [u'paging', u'data']
{u'id': u'307091372652912', u'name': u'\u0627\u062e\u0628\u0627\u0631 \u0627\u0644\u0645\u0647\u0627\u062c\u0631\u064a\u0646 \u0641\u064a \u0627\u0648\u0631\u0648\u0628\u0627'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 37 Al Italiya 307091372652912_515323645148580




Likes keys



 [u'paging', u'data']
{u'id': u'465619646953265', u'name': u'\u062c\u064a\u0645\u064a \u063a\u0627\u0644\u0628'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 38 None 307091372652912_493654217329959




Post 39 None 307091372652912_493652903996757




Likes keys



 [u'paging', u'data']
{u'id': u'1150581581637208', u'name': u'Gamal Dametry'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 40 None 307091372652912_492949837400397




Likes keys



 [u'paging', u'data']
{u'id': u'696863087080539', u'name': u'\u062d\u0642\u0642\u0629 \u0646\u0635\u0641 \u062d\u0644\u0645\u064a \u062d\u0644\u0645\u064a'}
Got likes paging
Likes paging keys [u'cursors']
Got 16 likes after paging
Post 41 None 307091372652912_492948637400517




Post 42 Timeline Photos 307091372652912_492945297400851




Likes keys



 [u'paging', u'data']
{u'id': u'147810158899253', u'name': u'\u0631\u0648\u0627\u0646 \u0627\u0644\u0628\u0635\u0631\u0627\u0648\u064a\u0647 \u0627\u0644\u0628\u0635\u0631\u0627\u0648\u064a\u0647'}
Got likes paging
Likes paging keys [u'cursors', u'next']
Returning like results 200




Got next page of likes (current) (25 so far)
Got 28 likes after paging
Post 43 None 307091372652912_492944230734291




Likes keys



 [u'paging', u'data']
{u'id': u'307091372652912', u'name': u'\u0627\u062e\u0628\u0627\u0631 \u0627\u0644\u0645\u0647\u0627\u062c\u0631\u064a\u0646 \u0641\u064a \u0627\u0648\u0631\u0648\u0628\u0627'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 44 None 307091372652912_492943814067666




Likes keys



 [u'paging', u'data']
{u'id': u'558669827620767', u'name': u'\u0627\u062f\u0631\u064a\u0633 \u0635\u0627\u062f\u0642'}
Got likes paging
Likes paging keys [u'cursors']
Got 6 likes after paging
Post 45 None 307091372652912_492943544067693




Likes keys



 [u'paging', u'data']
{u'id': u'558669827620767', u'name': u'\u0627\u062f\u0631\u064a\u0633 \u0635\u0627\u062f\u0642'}
Got likes paging
Likes paging keys [u'cursors']
Got 7 likes after paging
Post 46 None 307091372652912_492943230734391




Likes keys



 [u'paging', u'data']
{u'id': u'558669827620767', u'name': u'\u0627\u062f\u0631\u064a\u0633 \u0635\u0627\u062f\u0642'}
Got likes paging
Likes paging keys [u'cursors']
Got 8 likes after paging
Post 47 http://img.youm7.com/images/NewsPics/large/s8201218214536.jpg 307091372652912_276265985811305




Likes keys



 [u'paging', u'data']
{u'id': u'1465032537138588', u'name': u'\u062a\u0642\u064a \u0627\u0644\u0645\u0644\u0643\u064a'}
Got likes paging
Likes paging keys [u'cursors']
Got 4 likes after paging
Post 48 None 307091372652912_491315274230520




Likes keys



 [u'paging', u'data']
{u'id': u'482685718569495', u'name': u'Fouzi Trad'}
Got likes paging
Likes paging keys [u'cursors']
Got 4 likes after paging
Post 49 http://a5.sphotos.ak.fbcdn.net/hphotos-ak-ash3/539285_463421240355781_1706079713_n.jpg 307091372652912_396028703797944




Likes keys



 [u'paging', u'data']
{u'id': u'835581216556328', u'name': u'Sorita Ismael Snaibi'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 50 http://www.elakhbaronline.com/dz/media/k2/items/cache/1b9499731f0cfbb7e7f5f5b826330424_XL.jpg?t=-621 307091372652912_426165610768174




Likes keys



 [u'paging', u'data']
{u'id': u'832502190190922', u'name': u'\u0645\u0639\u062a\u0632 \u0627\u0644\u0641\u064a\u062a\u0648\u0631\u064a'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 51 None 307091372652912_490485894313458




Likes keys



 [u'paging', u'data']
{u'id': u'959159917475601', u'name': u'Adel Eteiwi'}
Got likes paging
Likes paging keys [u'cursors']
Got 3 likes after paging
Post 52 http://www.rnw.nl/data/files/imagecache/must_carry/images/lead/article/2012/08/anti-muslim-650.jpg 307091372652912_480552175297767




Likes keys



 [u'paging', u'data']
{u'id': u'441499756034846', u'name': u'Mohamed Alhadi'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 53 None 307091372652912_490069167688464




Likes keys



 [u'paging', u'data']
{u'id': u'805930666186547', u'name': u'Tamer Ammar'}
Got likes paging
Likes paging keys [u'cursors']
Got 6 likes after paging
Post 54 None 307091372652912_490067311021983




Likes keys



 [u'paging', u'data']
{u'id': u'483949461785825', u'name': u'Abou Omar Mohamed'}
Got likes paging
Likes paging keys [u'cursors']
Got 8 likes after paging
Post 55 None 307091372652912_490065707688810




Likes keys



 [u'paging', u'data']
{u'id': u'1648769508702317', u'name': u'Ahmed Elkh'}
Got likes paging
Likes paging keys [u'cursors']
Got 5 likes after paging
Post 56 None 307091372652912_490057364356311




Likes keys



 [u'paging', u'data']
{u'id': u'483949461785825', u'name': u'Abou Omar Mohamed'}
Got likes paging
Likes paging keys [u'cursors']
Got 5 likes after paging
Post 57 None 307091372652912_489730067722374




Likes keys



 [u'paging', u'data']
{u'id': u'733689006762031', u'name': u'Rafda Djeridi'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 58 None 307091372652912_489729277722453




Likes keys



 [u'paging', u'data']
{u'id': u'959159917475601', u'name': u'Adel Eteiwi'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 59 http://a6.sphotos.ak.fbcdn.net/hphotos-ak-ash4/314775_10150267705766875_206386401874_7898754_1930340 307091372652912_409966005707320




Likes keys



 [u'paging', u'data']
{u'id': u'1500654990258854', u'name': u'Abidou Saeed'}
Got likes paging
Likes paging keys [u'cursors']
Got 8 likes after paging
Post 60 None 307091372652912_488278541200860




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 61 إيطاليا: ثورة في "بريشا" اعتراضًا على المركز الإسلامي - المسلمون في إيطاليا - موقع المسلمون في العال 307091372652912_258970777553307
Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 62 None 307091372652912_487894304572617




Likes keys



 [u'paging', u'data']
{u'id': u'482685718569495', u'name': u'Fouzi Trad'}
Got likes paging
Likes paging keys [u'cursors']
Got 4 likes after paging
Post 63 None 307091372652912_487893004572747




Likes keys



 [u'paging', u'data']
{u'id': u'482685718569495', u'name': u'Fouzi Trad'}
Got likes paging
Likes paging keys [u'cursors']
Got 3 likes after paging
Post 64 None 307091372652912_487891384572909




Likes keys



 [u'paging', u'data']
{u'id': u'482685718569495', u'name': u'Fouzi Trad'}
Got likes paging
Likes paging keys [u'cursors']
Got 3 likes after paging
Post 65 None 307091372652912_487065141322200




Post 66 Harraga : Espagne la fin des soins gratuits pour les clandestins أسبانيا 307091372652912_271812176257115
Post 67 None 307091372652912_486957291332985




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 68 None 307091372652912_486956944666353




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 69 None 307091372652912_486956641333050




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 70 None 307091372652912_486956124666435




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 71 None 307091372652912_486580878037293




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 72 None 307091372652912_486580391370675




Post 73 None 307091372652912_486580118037369




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 74 None 307091372652912_486578584704189




Likes keys



 [u'paging', u'data']
{u'id': u'959159917475601', u'name': u'Adel Eteiwi'}
Got likes paging
Likes paging keys [u'cursors']
Got 3 likes after paging
Post 75 None 307091372652912_486577358037645




Post 76 None 307091372652912_486577028037678




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 77 None 307091372652912_486576474704400




Likes keys



 [u'paging', u'data']
{u'id': u'482685718569495', u'name': u'Fouzi Trad'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 78 None 307091372652912_486576001371114




Post 79 None 307091372652912_486178768077504




Likes keys



 [u'paging', u'data']
{u'id': u'913566968762707', u'name': u'Ahmed Elweekil'}
Got likes paging
Likes paging keys [u'cursors']
Got 4 likes after paging
Post 80 None 307091372652912_486178484744199




Likes keys



 [u'paging', u'data']
{u'id': u'835581216556328', u'name': u'Sorita Ismael Snaibi'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 81 None 307091372652912_486177824744265




Likes keys



 [u'paging', u'data']
{u'id': u'835581216556328', u'name': u'Sorita Ismael Snaibi'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 82 None 307091372652912_486177621410952




Likes keys



 [u'paging', u'data']
{u'id': u'482685718569495', u'name': u'Fouzi Trad'}
Got likes paging
Likes paging keys [u'cursors']
Got 3 likes after paging
Post 83 None 307091372652912_486177328077648




Likes keys



 [u'paging', u'data']
{u'id': u'835581216556328', u'name': u'Sorita Ismael Snaibi'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 84 None 307091372652912_486176888077692




Likes keys



 [u'paging', u'data']
{u'id': u'482685718569495', u'name': u'Fouzi Trad'}
Got likes paging
Likes paging keys [u'cursors']
Got 3 likes after paging
Post 85 None 307091372652912_486171648078216




Likes keys



 [u'paging', u'data']
{u'id': u'855740991205791', u'name': u'\u0623\u0628\u0648\u0645\u062d\u0645\u062f \u062c\u0645\u0639\u0629'}
Got likes paging
Likes paging keys [u'cursors']
Got 5 likes after paging
Post 86 None 307091372652912_480023085359739




Post 87 None 307091372652912_480022975359750




Likes keys



 [u'paging', u'data']
{u'id': u'959159917475601', u'name': u'Adel Eteiwi'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 88 None 307091372652912_480022708693110




Post 89 None 307091372652912_480022368693144




Likes keys



 [u'paging', u'data']
{u'id': u'962079760499859', u'name': u'MFawaz Amin'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 90 http://i.imgur.com/OVwzk.png 307091372652912_426431040728966




Likes keys



 [u'paging', u'data']
{u'id': u'835581216556328', u'name': u'Sorita Ismael Snaibi'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 91 None 307091372652912_478802475481800




Post 92 None 307091372652912_478251742203540




Likes keys



 [u'paging', u'data']
{u'id': u'482685718569495', u'name': u'Fouzi Trad'}
Got likes paging
Likes paging keys [u'cursors']
Got 5 likes after paging
Post 93 None 307091372652912_478247382203976




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 3 likes after paging
Post 94 None 307091372652912_478242268871154




Post 95 None 307091372652912_478242125537835




Post 96 None 307091372652912_478242008871180




Post 97 None 307091372652912_478241628871218




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 98 None 307091372652912_477848605577187




Likes keys



 [u'paging', u'data']
{u'id': u'925538380853140', u'name': u'Ahmedmody Shokr'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 99 None 307091372652912_477847558910625




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 100 None 307091372652912_477847222243992




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 101 None 307091372652912_477846775577370




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 102 None 307091372652912_477846548910726




Post 103 None 307091372652912_477845808910800




Likes keys



 [u'paging', u'data']
{u'id': u'898455913574478', u'name': u'Damour Amir'}
Got likes paging
Likes paging keys [u'cursors']
Got 3 likes after paging
Post 104 Regolarizzazione. Le domande si presentano a settembre 307091372652912_285191288254363




Likes keys



 [u'paging', u'data']
{u'id': u'1652076701703380', u'name': u'Souad Ahmad'}
Got likes paging
Likes paging keys [u'cursors']
Got 3 likes after paging
Post 105 None 307091372652912_477462428949138




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 106 None 307091372652912_477462098949171




Post 107 None 307091372652912_477461468949234




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 108 None 307091372652912_477461015615946




Post 109 None 307091372652912_477460525615995




Post 110 None 307091372652912_477460105616037




Post 111 None 307091372652912_477225102306204




Likes keys



 [u'paging', u'data']
{u'id': u'1491890917802348', u'name': u'\u0628\u0647\u0627\u0621 \u0627\u0644\u0645\u0648\u0633\u0648\u064a'}
Got likes paging
Likes paging keys [u'cursors', u'next']
Returning like results 200




Got next page of likes (current) (25 so far)
Got 34 likes after paging
Post 112 None 307091372652912_477042605657787




Likes keys



 [u'paging', u'data']
{u'id': u'1057849304227613', u'name': u'Alaa Weshahy'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 113 None 307091372652912_477042205657827




Likes keys



 [u'paging', u'data']
{u'id': u'897944276949084', u'name': u'Thaer Abd'}
Got likes paging
Likes paging keys [u'cursors']
Got 5 likes after paging
Post 114 None 307091372652912_476627709032610




Likes keys



 [u'paging', u'data']
{u'id': u'10206462410293262', u'name': u'Hassan Almane'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 115 None 307091372652912_476627405699307




Likes keys



 [u'paging', u'data']
{u'id': u'165790680427564', u'name': u'Nor Alnwar'}
Got likes paging
Likes paging keys [u'cursors']
Got 4 likes after paging
Post 116 None 307091372652912_476627179032663




Post 117 None 307091372652912_476626959032685




Post 118 None 307091372652912_476626645699383




Likes keys



 [u'paging', u'data']
{u'id': u'959159917475601', u'name': u'Adel Eteiwi'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 119 http://www.zapress.com/local/cache-vignettes/L80xH80/artoff14080-0bdd2.jpg 307091372652912_435756053113558




Post 120 None 307091372652912_476473699048011




Likes keys



 [u'paging', u'data']
{u'id': u'482685718569495', u'name': u'Fouzi Trad'}
Got likes paging
Likes paging keys [u'cursors']
Got 4 likes after paging
Post 121 http://www.xn--fachkrfte-offensive-lwb.de/ 307091372652912_253685261409825




Likes keys



 [u'paging', u'data']
{u'id': u'897944276949084', u'name': u'Thaer Abd'}
Got likes paging
Likes paging keys [u'cursors']
Got 4 likes after paging
Post 122 ألمانيا تسمح للمسلمين واليهود بإجراء عمليات الختان 307091372652912_321432697951433
Likes keys



 [u'paging', u'data']
{u'id': u'761858183960808', u'name': u'Jawad Bouraha'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 123 None 307091372652912_475853375776710




Post 124 None 307091372652912_475849852443729




Post 125 None 307091372652912_475846405777407




Likes keys



 [u'paging', u'data']
{u'id': u'897944276949084', u'name': u'Thaer Abd'}
Got likes paging
Likes paging keys [u'cursors']
Got 8 likes after paging
Post 126 None 307091372652912_474518609243520




Post 127 None 307091372652912_474517929243588




Post 128 None 307091372652912_474517375910310




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 129 None 307091372652912_474516909243690




Likes keys



 [u'paging', u'data']
{u'id': u'959159917475601', u'name': u'Adel Eteiwi'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 130 None 307091372652912_474516629243718




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 131 None 307091372652912_474516222577092




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 132 None 307091372652912_473892342639480




Likes keys



 [u'paging', u'data']
{u'id': u'959159917475601', u'name': u'Adel Eteiwi'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 133 None 307091372652912_473891652639549




Post 134 None 307091372652912_473891352639579




Likes keys



 [u'paging', u'data']
{u'id': u'925538380853140', u'name': u'Ahmedmody Shokr'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 135 None 307091372652912_473890909306290




Likes keys



 [u'paging', u'data']
{u'id': u'959159917475601', u'name': u'Adel Eteiwi'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 136 None 307091372652912_473265916035456




Likes keys



 [u'paging', u'data']
{u'id': u'912401935507631', u'name': u'Oubaid Simo'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 137 None 307091372652912_473265522702162




Likes keys



 [u'paging', u'data']
{u'id': u'1202882726394898', u'name': u'Samir Samirmeka'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 138 None 307091372652912_473265316035516




Post 139 None 307091372652912_473264936035554




Likes keys



 [u'paging', u'data']
{u'id': u'959159917475601', u'name': u'Adel Eteiwi'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 140 None 307091372652912_473262066035841




Post 141 None 307091372652912_473261832702531




Post 142 None 307091372652912_473261499369231




Likes keys



 [u'paging', u'data']
{u'id': u'959159917475601', u'name': u'Adel Eteiwi'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 143 None 307091372652912_473260916035956




Likes keys



 [u'paging', u'data']
{u'id': u'959159917475601', u'name': u'Adel Eteiwi'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 144 None 307091372652912_472608752767839




Post 145 None 307091372652912_472608282767886




Post 146 None 307091372652912_472608169434564




Likes keys



 [u'paging', u'data']
{u'id': u'531071097055324', u'name': u'Mouhajer Mes'}
Got likes paging
Likes paging keys [u'cursors']
Got 5 likes after paging
Post 147 Deutscher Akademischer Austauschdienst - DAAD - Deutscher Akademischer Austausch Dienst 307091372652912_259977340778425




Likes keys



 [u'paging', u'data']
{u'id': u'431484847044176', u'name': u'Muslim Utchiwa'}
Got likes paging
Likes paging keys [u'cursors']
Got 3 likes after paging
Post 148 None 307091372652912_472601582768556




Post 149 None 307091372652912_472600449435336




Likes keys



 [u'paging', u'data']
{u'id': u'1202882726394898', u'name': u'Samir Samirmeka'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 150 None 307091372652912_472320389463342




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 151 None 307091372652912_472299362798778




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 152 None 307091372652912_472296202799094




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 153 None 307091372652912_472295816132466




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 154 None 307091372652912_472271159468265




Likes keys



 [u'paging', u'data']
{u'id': u'925538380853140', u'name': u'Ahmedmody Shokr'}
Got likes paging
Likes paging keys [u'cursors']
Got 5 likes after paging
Post 155 None 307091372652912_470532672975447




Likes keys



 [u'paging', u'data']
{u'id': u'1202882726394898', u'name': u'Samir Samirmeka'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 156 None 307091372652912_467943826567665




Post 157 None 307091372652912_467943586567689




Post 158 None 307091372652912_467943239901057




Post 159 None 307091372652912_467942816567766




Post 160 http://www.dw.de/image/0,,15967947_401,00.jpg 307091372652912_326312150790773




Post 161 None 307091372652912_467942066567841




Likes keys



 [u'paging', u'data']
{u'id': u'307091372652912', u'name': u'\u0627\u062e\u0628\u0627\u0631 \u0627\u0644\u0645\u0647\u0627\u062c\u0631\u064a\u0646 \u0641\u064a \u0627\u0648\u0631\u0648\u0628\u0627'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 162 None 307091372652912_467361066625941




Likes keys



 [u'paging', u'data']
{u'id': u'959159917475601', u'name': u'Adel Eteiwi'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 163 None 307091372652912_467359776626070




Likes keys



 [u'paging', u'data']
{u'id': u'959159917475601', u'name': u'Adel Eteiwi'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 164 http://www.alitaliya.net/images/stories/a/consolato.jpg 307091372652912_432722086767616




Post 165 None 307091372652912_466523646709683




Likes keys



 [u'paging', u'data']
{u'id': u'835581216556328', u'name': u'Sorita Ismael Snaibi'}
Got likes paging
Likes paging keys [u'cursors']
Got 3 likes after paging
Post 166 None 307091372652912_466522880043093




Likes keys



 [u'paging', u'data']
{u'id': u'835581216556328', u'name': u'Sorita Ismael Snaibi'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 167 None 307091372652912_466521706709877




Likes keys



 [u'paging', u'data']
{u'id': u'1682952721926494', u'name': u'Ayoub Corleone'}
Got likes paging
Likes paging keys [u'cursors']
Got 3 likes after paging
Post 168 None 307091372652912_466521360043245




Likes keys



 [u'paging', u'data']
{u'id': u'678189828984396', u'name': u'Medo Madrid'}
Got likes paging
Likes paging keys [u'cursors']
Got 5 likes after paging
Post 169 None 307091372652912_465950353433679




Post 170 None 307091372652912_465950096767038




Likes keys



 [u'paging', u'data']
{u'id': u'959159917475601', u'name': u'Adel Eteiwi'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 171 Automobile Club d'Italia: Home page 307091372652912_421697814541619
Post 172 None 307091372652912_465949420100439




Post 173 None 307091372652912_465947760100605




Likes keys



 [u'paging', u'data']
{u'id': u'959159917475601', u'name': u'Adel Eteiwi'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 174 None 307091372652912_465947320100649




Likes keys



 [u'paging', u'data']
{u'id': u'959159917475601', u'name': u'Adel Eteiwi'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 175 None 307091372652912_465946310100750




Likes keys



 [u'paging', u'data']
{u'id': u'1652076701703380', u'name': u'Souad Ahmad'}
Got likes paging
Likes paging keys [u'cursors']
Got 4 likes after paging
Post 176 None 307091372652912_465945640100817




Post 177 None 307091372652912_464643343564380




Likes keys



 [u'paging', u'data']
{u'id': u'959159917475601', u'name': u'Adel Eteiwi'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 178 None 307091372652912_464642296897818




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 179 None 307091372652912_464157870279594




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 180 None 307091372652912_464157103613004




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 181 None 307091372652912_464156063613108




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 182 Portaal belgium.be | Portail belgium.be | Portal belgium.be 307091372652912_385627174833901
Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 183 None 307091372652912_464153953613319




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 184 None 307091372652912_464153290280052




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 185 None 307091372652912_464151910280190




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 186 None 307091372652912_462361803792534




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 187 None 307091372652912_462360607125987




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 188 None 307091372652912_462358067126241




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 189 None 307091372652912_462357470459634




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 190 None 307091372652912_458133057548742




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 191 None 307091372652912_458132230882158




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 192 None 307091372652912_458131877548860




Likes keys



 [u'paging', u'data']
{u'id': u'835581216556328', u'name': u'Sorita Ismael Snaibi'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 193 None 307091372652912_458131550882226




Likes keys



 [u'paging', u'data']
{u'id': u'1202882726394898', u'name': u'Samir Samirmeka'}
Got likes paging
Likes paging keys [u'cursors']
Got 3 likes after paging
Post 194 None 307091372652912_458130010882380




Likes keys



 [u'paging', u'data']
{u'id': u'1652076701703380', u'name': u'Souad Ahmad'}
Got likes paging
Likes paging keys [u'cursors']
Got 4 likes after paging
Post 195 None 307091372652912_455731967788851




Likes keys



 [u'paging', u'data']
{u'id': u'1652076701703380', u'name': u'Souad Ahmad'}
Got likes paging
Likes paging keys [u'cursors']
Got 3 likes after paging
Post 196 None 307091372652912_455731347788913




Post 197 None 307091372652912_455730831122298




Likes keys



 [u'paging', u'data']
{u'id': u'307091372652912', u'name': u'\u0627\u062e\u0628\u0627\u0631 \u0627\u0644\u0645\u0647\u0627\u062c\u0631\u064a\u0646 \u0641\u064a \u0627\u0648\u0631\u0648\u0628\u0627'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 198 None 307091372652912_455730244455690




Likes keys



 [u'paging', u'data']
{u'id': u'1074961369181033', u'name': u'Mustapha Khalfoun'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 199 None 307091372652912_455729687789079




Likes keys



 [u'paging', u'data']
{u'id': u'307091372652912', u'name': u'\u0627\u062e\u0628\u0627\u0631 \u0627\u0644\u0645\u0647\u0627\u062c\u0631\u064a\u0646 \u0641\u064a \u0627\u0648\u0631\u0648\u0628\u0627'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 200 None 307091372652912_455729124455802




Post 201 None 307091372652912_455725984456116




Post 202 None 307091372652912_455725311122850




Likes keys



 [u'paging', u'data']
{u'id': u'10154207988809606', u'name': u'Raed Alnajjar'}
Got likes paging
Likes paging keys [u'cursors']
Got 7 likes after paging
Post 203 Brussels Airport Website:    Home 307091372652912_421220411256534




Likes keys



 [u'paging', u'data']
{u'id': u'10154207988809606', u'name': u'Raed Alnajjar'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 204 Startsida      - Migrationsverket 307091372652912_308117462613217




Likes keys



 [u'paging', u'data']
{u'id': u'751186518325293', u'name': u'Bassam Ghossen'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 205 DB Bahn: bahn.de - Ihr Mobilitätsportal für Reisen, Bahn, Urlaub, Hotels, Städtereisen und Mietwagen 307091372652912_390424510994909
Likes keys



 [u'paging', u'data']
{u'id': u'751186518325293', u'name': u'Bassam Ghossen'}
Got likes paging
Likes paging keys [u'cursors']
Got 3 likes after paging
Post 206 None 307091372652912_455181324510582




Post 207 None 307091372652912_455180841177297




Post 208 None 307091372652912_455148767847171




Likes keys



 [u'paging', u'data']
{u'id': u'10153184851989211', u'name': u'Ahmed Hassan'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 209 None 307091372652912_455138407848207




Likes keys



 [u'paging', u'data']
{u'id': u'959159917475601', u'name': u'Adel Eteiwi'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 210 None 307091372652912_455137434514971




Post 211 None 307091372652912_455123177849730




Post 212 None 307091372652912_455016131193768




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 213 None 307091372652912_455009624527752




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 214 None 307091372652912_454431524585562




Likes keys



 [u'paging', u'data']
{u'id': u'959159917475601', u'name': u'Adel Eteiwi'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 215 'Smart borders' - Europe's new high-tech frontiers | Europe | DW.DE | 03.06.2012 307091372652912_112390535568914
Post 216 None 307091372652912_453661274662587




Likes keys



 [u'paging', u'data']
{u'id': u'1652076701703380', u'name': u'Souad Ahmad'}
Got likes paging
Likes paging keys [u'cursors']
Got 3 likes after paging
Post 217 None 307091372652912_453651854663529




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 218 None 307091372652912_453650587996989




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 219 None 307091372652912_453648987997149




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 220 None 307091372652912_453648501330531




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 221 None 307091372652912_453647881330593




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 222 None 307091372652912_453646817997366




Post 223 None 307091372652912_453646327997415




Likes keys



 [u'paging', u'data']
{u'id': u'869152986503764', u'name': u'Amr Refaat'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 224 None 307091372652912_453645417997506




Post 225 None 307091372652912_452898478072200




Likes keys



 [u'paging', u'data']
{u'id': u'835581216556328', u'name': u'Sorita Ismael Snaibi'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 226 None 307091372652912_452573518104696




Likes keys



 [u'paging', u'data']
{u'id': u'835581216556328', u'name': u'Sorita Ismael Snaibi'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 227 None 307091372652912_452571474771567




Likes keys



 [u'paging', u'data']
{u'id': u'835581216556328', u'name': u'Sorita Ismael Snaibi'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 228 None 307091372652912_452148154813899




Likes keys



 [u'paging', u'data']
{u'id': u'835581216556328', u'name': u'Sorita Ismael Snaibi'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 229 None 307091372652912_452144778147570




Likes keys



 [u'paging', u'data']
{u'id': u'835581216556328', u'name': u'Sorita Ismael Snaibi'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 230 None 307091372652912_452137724814942




Likes keys



 [u'paging', u'data']
{u'id': u'835581216556328', u'name': u'Sorita Ismael Snaibi'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 231 None 307091372652912_452137198148328




Likes keys



 [u'paging', u'data']
{u'id': u'10153620342047037', u'name': u'Eslam Ahmed Elgendy'}
Got likes paging
Likes paging keys [u'cursors']
Got 5 likes after paging
Post 232 None 307091372652912_451142131581168




Likes keys



 [u'paging', u'data']
{u'id': u'1652076701703380', u'name': u'Souad Ahmad'}
Got likes paging
Likes paging keys [u'cursors']
Got 2 likes after paging
Post 233 None 307091372652912_451135928248455




Likes keys



 [u'paging', u'data']
{u'id': u'307091372652912', u'name': u'\u0627\u062e\u0628\u0627\u0631 \u0627\u0644\u0645\u0647\u0627\u062c\u0631\u064a\u0646 \u0641\u064a \u0627\u0648\u0631\u0648\u0628\u0627'}
Got likes paging
Likes paging keys [u'cursors']
Got 4 likes after paging
Post 234 None 307091372652912_451135211581860




Likes keys



 [u'paging', u'data']
{u'id': u'307091372652912', u'name': u'\u0627\u062e\u0628\u0627\u0631 \u0627\u0644\u0645\u0647\u0627\u062c\u0631\u064a\u0646 \u0641\u064a \u0627\u0648\u0631\u0648\u0628\u0627'}
Got likes paging
Likes paging keys [u'cursors']
Got 4 likes after paging
Post 235 None 307091372652912_451134964915218




Post 236 None 307091372652912_450527724975942




Likes keys



 [u'paging', u'data']
{u'id': u'959159917475601', u'name': u'Adel Eteiwi'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 237 None 307091372652912_450526398309408




Likes keys



 [u'paging', u'data']
{u'id': u'959159917475601', u'name': u'Adel Eteiwi'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 238 None 307091372652912_450467981648583




Likes keys



 [u'paging', u'data']
{u'id': u'1652076701703380', u'name': u'Souad Ahmad'}
Got likes paging
Likes paging keys [u'cursors']
Got 3 likes after paging
Post 239 None 307091372652912_450467058315342




Post 240 None 307091372652912_450466158315432




Likes keys



 [u'paging', u'data']
{u'id': u'678189828984396', u'name': u'Medo Madrid'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging
Post 241 None 307091372652912_449851405043574




Post 242 None 307091372652912_449850721710309




Post 243 None 307091372652912_449850368377011




Post 244 Syria, Homs, the town of Al Hola, 25-5-2012 Al Hola massacre (part 2) +18 307091372652912_440136499332547
Post 245 None 307091372652912_446048052090576




Post 246 None 307091372652912_443190199043028




Post 247 None 307091372652912_443189759043072




Post 248 None 307091372652912_443188405709874




Post 249 Timeline Photos 307091372652912_442640255764689




Likes keys



 [u'paging', u'data']
{u'id': u'640855626056060', u'name': u'\u0623\u0628\u0648 \u0639\u0644\u064a'}
Got likes paging
Likes paging keys [u'cursors']
Got 1 likes after paging


([{u'created_time': u'2015-02-24T12:51:02+0000',
   'en_message': u'hello astadr for the long absence from the page for personal reasons that god will tell is activated page new topics of interest to every immigrant or dream immigration  god willing  and claimed is increased aladinm of the page because we asobhny over 19 000 joint may allah reward you for your patience to feel that he might secretariat it is estimated answer a new topic every day for immigrants and in all honesty and sincerity vadhanh deserves to be a partner in the activation page needy 4 bloggers and atdcarua it s honesty and must ntekr they service for the sake of god to our brothers and god willing at the weekend claimed the appointment and everyone who wants to run phil comment published on this topic and to know what one is faithful and honest and capable responsibility fell published in the news long as you amanullah my brothers and sisters',
   u'from': {u'category': u'Community',
    u'id': u'307091372652912',

In [None]:
getPages(r.json()['data'],r.json().get('paging'),restart=None)

In [38]:
countCollections()

1145 pages
101760 posts
208335 comments
77796 likes


In [35]:
def getPages(data,paging,restart=None):
    
    if paging:
        if paging.get('next'):
            logging.warning('Paging needed')
    
    for n,d in enumerate(data):
        
        if (restart and d['id']==str(restart)) or (not restart):
            # If restart id defined then wait until we find it
            # if not add straight in
            restart=None
        
            time.sleep(pageSleepTime)

            print n,d['name']

            if not langid.classify(d['name'])[0]=='en':
                try:
                    enName=textblob.TextBlob(d['name']).translate().string
                    enName=clean(enName)
                    print 'Translates: ',enName
                except:
                    logging.warning('Translation failed')
                    enName=None
            else:
                enName=None

            print 'http://fb.com/'+d['id'],d.get('category')
            res=getPageInfo(d['id'],raw=True)

            res['name']=clean(d['name'])
            res['about']=clean(d.get('about'))
            res['description']=clean(d.get('description'))

            ######################################################################
            posts,comments,likes=getPostsFromPage(d['id'],limit=postsLimit,raw=False)
            # Get posts,comments,likes from that page
            nAdded=nAlready=0
            for post in posts:
                if not isPostInDb(post['id']):
                    addPostToDb(post)
                    nAdded+=1
                else:
                    #logging.warning('Post %s already in DB' % post['id'])
                    nAlready+=1
            logging.warning('%d posts added (%d already in DB)' % (nAdded,nAlready))
    #        addCommentsToDb(comments)

    #        addLikesToDb(likes)
            ######################################################################

            if enName:
                res['name_en']=enName

            category=d.get('category')
            if category:
                res['category']=category

            print res.keys()
            if not isPageInDb(d['id']):
                addPageToDb(res)
            else:
                logging.info('Page %s already in DB' % d['id'])
        else:
            logging.warning('Skipping page %s. Waiting for %s to restart' % (d['id'],restart))

In [42]:
def getPostsFromPage(pageId,raw=False,limit=100):
    '''
    Requests list of posts, list of comments and 
    list of likes from a page
    Returns a list of JSON objects
    or if raw=True, a string description of posts
    '''
    
    logging.info('Getting posts,comments,likes for page %s' % pageId)
    
    tempUrl='https://graph.facebook.com/%s/posts?&limit=%d&access_token=%s' % (pageId,postsLimit,ACCESSTOKEN)
    
    out=[]
    outFull=[]
    
    comments=None
    likes=None
    
    r=requests.get(tempUrl)
    ######################################################
    success=None
    nAttempts=0

    while not success:
        # Keep looping if unsuccessful
        r=requests.get(tempUrl)
        success,skip=handleResult(r.status_code,r.text)
        # Try, find out if successful or should skip

        if skip or nAttempts==nSkip:
            # If tried nSkip times or if should skip
            r={'data':[],'paging':None}
            if nAttempts==nSkip:
                logging.warning('Skipping posts after %d attempts' % nAttempts)
                return ([],[],[])
        time.sleep(nWait)
        nAttempts+=1
    ######################################################
    
    for n,d in enumerate(r.json()['data']):
        
        time.sleep(postSleepTime)

        
        name=d.get('name')
        id=d.get('id')
        print 'Post %d %s %s' % (n,name,id)
        
        message=d.get('message')
        if message:
            message=clean(message)
        else:
            logging.warning('No message for post %s' % d['id'])
        
        description=d.get('description')
        if description:
            description=clean(description)
        else:
            logging.warning('No description for post %s' % d['id'])
        
        caption=d.get('caption')
        if caption:
            caption=clean(caption)
        else:
            logging.warning('No caption for post %s' % d['id'])
        
        d['page_id']=pageId
        d['retrieved']=time.time()
        
        if d.get('icon'):del d['icon']
        if d.get('picture'):del d['picture']
        if d.get('privacy'):del d['privacy']
        # Don't need these
        
        try:
            shareCount=d['shares']['count']
            d['shares']=shareCount
        except:
            pass
        # Simplify this
        
        if message:
#            print message
            out.append(message)
            if not langid.classify(message)[0]=='en':
                try:
                    enMessage=textblob.TextBlob(message).translate().string
                    enMessage=clean(enMessage)
                    out.append('==>'+enMessage+'---------')
                    d['en_message']=enMessage
                except:
                    logging.warning('Translation failed')
                    enMessage=None
        if description:
            out.append(description)
            if not langid.classify(description)[0]=='en':
                try:
                    enDescription=textblob.TextBlob(description).translate().string
                    enDescription=clean(enDescription)
                    out.append('==>'+enDescription+'---------')
                    d['en_description']=enDescription
                except:
                    logging.warning('Translation failed')
                    enDescription=None
        if caption:
            out.append(caption)
            if not langid.classify(caption)[0]=='en':
                try:
                    enCaption=textblob.TextBlob(caption).translate().string
                    enCaption=clean(enCaption)
                    out.append('==>'+enCaption+'---------')
                    d['en_caption']=enCaption
                except:
                    enCaption=None
                    
        try:
            comments=d['comments']
            del d['comments']
        except:
            comments=None
        
        if comments:
            logging.info('Getting comments...')
            commentData=getComments(comments,pageId)
            # This does all the paging
#            for c in commentData['data']:
#                print 'Comments data:',c.keys()
#                print 'Likes:',c[u'user_likes'],c.get('likes')
            addCommentsToDb(commentData)
            
            # TODO get comment likes
            
        try:
            likes=d['likes']
            del d['likes']
        except:
            likes=None
        
        if likes:
            logging.info('Getting likes...')
            likeData=getLikes(likes,pageId,id)
            # This does all the paging
            addLikesToDb(likeData)

        
        outFull.append(d)       
    
    if raw:
        '\n'.join(out)
        pass # return string
    else:
        return outFull,comments,likes

In [65]:
def getLikes(likes,pageId,id):
    '''
    Takes a dictionary of like data from API with keys
    [paging,data], pageId and post id. If paging information is present, keep 
    requesting pages
    '''
    
    print 'Likes keys',likes.keys()
    
    print likes['data'][0]
    
    if likes.get('paging'):
        print 'Got likes paging'
        print 'Likes paging keys',likes['paging'].keys()
        current=likes
        
        while current['paging'].get('next'):
            logging.info('Paging likes... %s' % current['paging']['next'])
            current=getNextLikes(current['paging']['next'])
            if current:
                print 'Got next page of likes (current) (%d so far)' % len(likes['data'])
                likes['data'].extend(current['data'])
            else:
                print 'Breaking from likes'
                break
            # TODO better error handling
                
        print 'Got %d likes after paging' % len(likes['data'])
#    print 'Likes type %s' % type(likes)
#    print likes.keys()
    
    for like in likes['data']:
        like['id']='%s_%s' % (id,like['id'])
        # Make a unique like ID made up of post id_likeid
        like['parent_id']=id
        # Keep parent id of comment/post for getting most liked content
    
#    print 'Likes',likes.keys()
#    print likes.get('paging')
#    print likes['data'][0]
    return likes

In [31]:
def getComments(comments,pageId):
    '''
    Takes a dictionary of comment data from API with keys
    [paging,data]. If paging information is present, keep
    requesting pages
    '''
    
    if comments.get('paging'):
        
        current=comments
        
        while current['paging'].get('next'):
            logging.info('Paging comments...')
            current=getNextComments(current['paging']['next'])
            comments['data'].extend(current['data'])
            
            if not current.get('paging'):
                break
    return comments

In [23]:
def getNextComments(nextToken):
    res=requests.get(nextToken)
    
    if not res.status_code==200:
        logging.warning('Error with next comments data %d' % res.status_code)
        return None
    else:
        return res.json()

In [50]:
def getNextLikes(nextToken):
    logging.warning('Getting next likes: %s' % nextToken)
    res=requests.get(nextToken)
    
    if not res.status_code==200:
        logging.warning('Error with next likes data %d %s' % (res.status_code,res.text))
        return None
    else:
        print 'Returning like results %d' % res.status_code
        return res.json()

In [None]:
trash=getPostsFromPage('657095547710700',raw=True,limit=10)



KeyboardInterrupt: 

In [25]:
def getPageInfo(pageId,raw=False):
    '''
    Requests info for a page by ID
    Returns the info either as a JSON object
    or if raw=True as a string to be printed
    '''
    tempUrl='https://graph.facebook.com/v2.4/'+pageId+'?fields=about,description,location,phone,talking_about_count,\
    engagement,start_info,likes,website&access_token='+ACCESSTOKEN

    res=requests.get(tempUrl)
    
    
    if not res.status_code==200:
        logging.warning('Request failed: %d %s' % (res.status_code,res.text))
    
    res=res.json()
    res['retrieved']=time.time()
    
    description=res.get('description')
    if description:
        description=clean(description)
    else:
        logging.warning('No description for page %s' % pageId)
        logging.warning('Keys %s' % res.keys())
    
    engagement=res.get('engagement')
    if engagement:
        if engagement.get(u'count'):
            res['engagement']=str(res['engagement']['count'])
    
    start_info=res.get('start_info')
    if start_info:
        date=start_info.get(u'date')
        if date:
            res['start_info_clean']=str(res['start_info']['date']['year'])
            if res['start_info']['date'].get('month'):
                res['start_info_clean']+='/'+str(res['start_info']['date']['month'])
                if res['start_info']['date'].get('day'):
                    res['start_info_clean']+='/'+str(res['start_info']['date']['day'])

        else:
            del res['start_info']
    
    for k,v in res.items():
        if type(v) in [unicode,str]:

            if not langid.classify(v)[0]=='en':

                try:
                    res[k+'_en']=textblob.TextBlob(v).translate().string
                except:
                    logging.warning('Translation failed')
                # Create a new dictionary entry with the translation
    if raw:    
        return res
    else:
#        print res.json().items()
        return '\n'.join([k+'\t\t'+unicode(v) for k,v in res.items()])+'\n================'