In [None]:
import pandas
from pandas.io.json import json_normalize
import json

In [None]:
'''
These constants represent the different file paths you can check out.
They are available in the facebook-data-anonymous folder
'''

ABOUT__FACE_RECOGNITION = "about_you/face_recognition.json"
ABOUT__FRIEND_PEER_GROUP = "about_you/friend_peer_group.json"
ABOUT__ADDRESS_BOOK = "about_you/your_address_books.json"

ADS__INTERESTS = "ads/ads_interests.json"
ADS__UPLOADED_CONTACT_LIST = "ads/advertisers_who_uploaded_a_contact_list_with_your_information.json"
ADS__INTERACTED_WITH = "ads/advertisers_you've_interacted_with.json"

APPS_AND_WEBSITES = 'apps_and_websites/apps_and_websites.json'

COMMENTS = 'comments/comments.json'

EVENTS__EVENT_INVITATIONS = 'events/event_invitations.json'
EVENTS__YOUR_EVENT_RESPONSES = 'events/your_event_responses.json'
EVENTS__YOUR_EVENTS = 'events/your_events.json'

FOLLOWS__FOLLOWED_PAGES = 'following_and_followers/followed_pages.json'
FOLLOWS__FOLLOWING = 'following_and_followers/following.json'
FOLLOWS__UNFOLLOWED_PAGES = 'following_and_followers/unfollowed_pages.json'

FRIENDS__FRIENDS = 'friends/friends.json'
FRIENDS__REJECTED_REQUESTS = 'friends/rejected_friend_requests.json'
FRIENDS__REMOVED = 'friends/removed_friends.json'
FRIENDS__SENT_REQUESTS = 'friends/sent_friend_requests.json'

GROUPS__MEMBERSHIP_ACTIVITY = 'groups/your_group_membership_activity.json'
GROUPS__ADMIN = 'groups/your_groups.json'
GROUPS__POSTS_AND_COMMENTS = 'groups/your_posts_and_comments_in_groups.json'

LIKES_AND_REACTIONS__PAGES = 'likes_and_reactions/pages.json'
LIKES_AND_REACTIONS__POSTS_AND_COMMENTS = 'likes_and_reactions/posts_and_comments.json'

MARKETPLACE__ITEMS_BOUGHT = 'marketplace/items_bought.json'
MARKETPLACE__ITEMS_SOLD = 'marketplace/items_sold.json'

MESSAGES__SAMPLE = 'messages/inbox/33eee_ozf3zkkl9a/message_1.json'

PAYMENT_HISTORY = 'payment_history/payment_history.json'

POSTS__OTHERS_PEOPLES_POSTS_TO_YOUR_TIMELINE = "posts/other_people's_posts_to_your_timeline.json"
POSTS__YOUR_POSTS = 'posts/your_posts.json'

PROFILE__PROFILE_INFO = 'profile_information/profile_information.json'
PROFILE__UPDATE_HISTORY = 'profile_information/profile_update_history.json'

SAVED_ITEMS_AND_COLLECTIONS = 'saved_items_and_collections/saved_items_and_collections.json'

SEARCH_HISTORY = 'search_history/your_search_history.json'

In [None]:
'''
These methods make it easier to convert the data into a pandas dataframe
The data is in the form of a non-repetitive JSON, so it is not the most
conducive to a data frame. But in case you want to check the frequency
with which certain values appear, this flattens the properties in the
order that they appear
'''

def json_as_dataframe(filename):
    data = _json_with_ignored_keys(filename)
    (_, flatten_keys) = _keys_for_file(filename)
    # We want to flatten the `data` value a lot of these have    
    for d in data:
        d = _collapse_keys(d, flatten_keys)
                
    pd = pandas.DataFrame.from_dict(data)
    return json_normalize(data)


def _json_with_ignored_keys(filename):
    with open('../facebook-data-anonymous/' + filename) as f:
        data = json.load(f)
    (ignored_keys, flatten_keys) = _keys_for_file(filename)
    for k in ignored_keys:
        data = data[k]
    return data

def _collapse_keys(row, collapse_keys):
    for ck in collapse_keys:
        if ck in row and isinstance(row[ck], list) and len(row[ck]) == 1:
            row[ck] = _collapse_keys(row[ck][0], collapse_keys)
    return row

def _keys_for_file(filename):
    ig_keys = []
    flatten_keys = []
    if filename is ABOUT__ADDRESS_BOOK:
        ig_keys = ['address_book', 'address_book']
        
    if filename is ADS__INTERACTED_WITH:
        ig_keys = ['history']
        
    if filename is APPS_AND_WEBSITES:
        ig_keys = ['installed_apps']
        
    if filename is COMMENTS:
        ig_keys = ['comments']
        flatten_keys = ['data']
        
    if filename is EVENTS__EVENT_INVITATIONS:
        ig_keys = ["events_invited"]
    if filename is EVENTS__YOUR_EVENT_RESPONSES:
        # NOTE: this can be changed to `events_declined` or `events_interested`
        ig_keys = ['event_responses', 'events_joined'] 
    if filename is EVENTS__YOUR_EVENTS:
        ig_keys = ['your_events']
        
    if filename is FOLLOWS__FOLLOWED_PAGES:
        ig_keys = ['pages_followed']
        flatten_keys = ['data']
    if filename is FOLLOWS__FOLLOWING:
        ig_keys = ['following']
    if filename is FOLLOWS__UNFOLLOWED_PAGES:
        ig_keys = ['pages_unfollowed']
        flatten_keys = ['data']

    if filename is FRIENDS__FRIENDS:
        ig_keys = ['friends']
    if filename is FRIENDS__REJECTED_REQUESTS:
        ig_keys = ['rejected_requests']
    if filename is FRIENDS__REMOVED:
        ig_keys = ['deleted_friends']
    if filename is FRIENDS__SENT_REQUESTS:
        ig_keys = ['sent_requests']
        
    if filename is GROUPS__MEMBERSHIP_ACTIVITY:
        ig_keys = ['groups_joined']
        flatten_keys = ['attachments', 'data']
    if filename is GROUPS__ADMIN:
        ig_keys = ['groups_admined']
    if filename is GROUPS__POSTS_AND_COMMENTS:
        ig_keys = ['group_posts', 'activity_log_data']
        flatten_keys = ['data']

    if filename is LIKES_AND_REACTIONS__PAGES:
        ig_keys = ['page_likes']
    if filename is LIKES_AND_REACTIONS__POSTS_AND_COMMENTS:
        ig_keys = ['reactions']
        flatten_keys = ['data']
        
    if filename is MARKETPLACE__ITEMS_BOUGHT:
        ig_keys = ['items_buying']
    if filename is MARKETPLACE__ITEMS_SOLD:
        ig_keys = ['items_selling']
        
    if filename is MESSAGES__SAMPLE:
        ig_keys = ['messages']
        
    if filename is PAYMENT_HISTORY:
        ig_keys = ['payments', 'payments']
        
    if filename is POSTS__OTHERS_PEOPLES_POSTS_TO_YOUR_TIMELINE:
        ig_keys = ['wall_posts_sent_to_you']
        flatten_keys = ['data']
    if filename is POSTS__YOUR_POSTS:
        ig_keys = ['status_updates']
        flatten_keys = ['attachments', 'data'] 
        
    if filename is SAVED_ITEMS_AND_COLLECTIONS:
        ig_keys = ['saves_and_collections']
        flatten_keys = ['attachments', 'data'] 

    if filename is SEARCH_HISTORY:
        ig_keys = ['searches']
        flatten_keys = ['data']
        
    return (ig_keys, flatten_keys)

In [None]:
'''
This is where you can play around with the data frame.
Have fun!
'''

a = json_as_dataframe(EVENTS__EVENT_INVITATIONS)
a.sample(n=1)