In [9]:
import requests

## Check Rate Limit

In [20]:
import time
import json

def check_rate_limit(response, threshold=90, wait_time=60):
    """
    Checks if the rate limit is close to being reached and waits if necessary.
    
    Parameters:
        response (requests.Response): The response object from an API call.
        threshold (int): The percentage threshold (0-100) at which to trigger waiting.
        wait_time (int): Number of seconds to wait if the rate limit is near the threshold.
        
    Returns:
        bool: True if rate limit is within acceptable limits, False if waiting was triggered.
    """
    # Check for the 'X-App-Usage' header to get current app-level usage statistics
    app_usage = response.headers.get("X-App-Usage")
    if app_usage:
        usage_data = json.loads(app_usage)
        call_count = usage_data.get("call_count", 0)
        total_time = usage_data.get("total_time", 0)
        total_cputime = usage_data.get("total_cputime", 0)
        
        print('app_usage: ', usage_data)
        # Check if any of these metrics are near the threshold
        if call_count > threshold or total_time > threshold or total_cputime > threshold:
            print(f"Rate limit nearing threshold. Waiting for {wait_time} seconds.")
            time.sleep(wait_time)
            return False

    # Check for the 'X-Business-Use-Case-Usage' header for business-level usage statistics
    # x-ad-account-usage: {
    #     "acc_id_util_pct": 9.67,   //Percentage of calls made for this ad account.
    #     "reset_time_duration": 100,   //Time duration (in seconds) it takes to reset the current rate limit score.
    #     "ads_api_access_tier": 'standard_access'   //Tiers allows your app to access the Marketing API. standard_access enables lower rate limiting.
    # }
    business_usage = response.headers.get("X-Business-Use-Case-Usage")
    if business_usage:
        usage_data = json.loads(business_usage)
        acc_util_pct = usage_data.get("acc_id_util_pct", 0)
        
        print('business_usage: ', usage_data)
        if acc_util_pct > threshold:
            print(f"Business rate limit nearing threshold ({acc_util_pct}%). Waiting for {wait_time} seconds.")
            time.sleep(wait_time)
            return False
        

    # If usage is within safe limits
    return True


## TOKENS

In [None]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv(override=True)

# Get environment variables
PAGE_ACCESS_TOKEN = os.getenv('PAGE_ACCESS_TOKEN')
PAGE_ID = os.getenv('PAGE_ID') 
IG_USER_ID = os.getenv('IG_USER_ID')



## IG fields and metrics

In [12]:
ig_reel_metrics = ['clips_replays_count','ig_reels_aggregated_all_plays_count','ig_reels_avg_watch_time',
                    'ig_reels_video_view_total_time','total_interactions','saved','shares','reach','plays','likes','comments']

ig_img_metrics = ['profile_activity','profile_visits','follows','impressions','total_interactions','saved','shares','reach','likes','comments']


ig_all_metrics = ['impressions', 'reach', 'replies', 'saved', 'video_views', 'likes', 'comments', 'shares', 'plays', 'total_interactions', 'follows', 'profile_visits', 'profile_activity', 'navigation', 'ig_reels_video_view_total_time', 'ig_reels_avg_watch_time', 'clips_replays_count', 'ig_reels_aggregated_all_plays_count']



In [13]:
ig_media_fields = ['id','caption','media_type','media_url','permalink','thumbnail_url','timestamp','comments_count','media_product_type','boost_ads_list']


In [22]:
def save_to_json(json_data, filename):
    """
    Save posts data to a JSON file with proper encoding handling
    
    Args:
        json_data: List of post dictionaries to save
        filename: Name of the output JSON file
    """
    import json
    
    # Handle Facebook Graph API objects that aren't JSON serializable
    def json_serializer(obj):
        if hasattr(obj, '__dict__'):
            return obj.__dict__
        return str(obj)
    
    try:
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(json_data, f, ensure_ascii=False, indent=2, default=json_serializer)
        print(f"Successfully saved data to {filename}")
    except Exception as e:
        print(f"Error saving to JSON: {str(e)}")
        
        
def read_json_file(filename):
    """
    Read any data from a JSON file
    
    Args:
        filename: Name of the JSON file to read
    Returns:
        Data loaded from the JSON file
    """
    try:
        with open(filename, 'r', encoding='utf-8') as f:
            data = json.load(f)
        print(f"Successfully loaded data from {filename}")
        return data
    except Exception as e:
        print(f"Error reading JSON file: {str(e)}")
        return None

## IG media insights export

In [17]:

def fetch_instagram_media_with_insights(ig_account_id, fields, metrics):
    """
    Fetches all Instagram media with specified insights.
    
    Parameters:
        ig_account_id (str): Instagram Business Account ID.
        fields (list): List of fields to retrieve for each media.
        metrics (list): List of metrics to retrieve in insights.
    
    Returns:
        list: A list of dictionaries containing media data with insights.
    """
    all_media_data = []
    url = f"https://graph.facebook.com/v21.0/{ig_account_id}/media"
    params = {
        "fields": ",".join(fields) + f",insights.metric({','.join(metrics)})",
        "access_token": PAGE_ACCESS_TOKEN,
        # "limit": 100  # Maximum allowed per request for efficiency
    }

    while url:
        response = requests.get(url, params=params)
        
        # Check rate limit before processing
        if not check_rate_limit(response):  # Assuming check_rate_limit function is defined
            continue  # Skip iteration if we're waiting due to rate limit
        
        data = response.json()
        
        if "data" in data:
            for media in data["data"]:
                media_info = {field: media.get(field) for field in fields}
                media_info["insights"] = {}
                media_info["raw_data"] = media
                
                # Process insights
                if "insights" in media:
                    for insight in media["insights"]["data"]:
                        metric_name = insight["name"]
                        metric_value = insight["values"][0]["value"]
                        media_info["insights"][metric_name] = metric_value
                
                all_media_data.append(media_info)
            
            # Get next page URL for pagination
            url = data.get("paging", {}).get("next")
        else:
            print("Error fetching data:", data)
            break

    return all_media_data

media_data_with_insights = fetch_instagram_media_with_insights(IG_USER_ID, ig_media_fields, ig_all_metrics)

save_to_json(media_data_with_insights, "media_data_with_insights.json")

# Print results
for media in media_data_with_insights:
    print("Media ID:", media.get("id"))
    print("Caption:", media.get("caption"))
    print("Media Type:", media.get("media_type"))
    print("Media URL:", media.get("media_url"))
    print("Timestamp:", media.get("timestamp"))
    print("Insights:", media.get("insights"))
    print("-----")


acc_util_pct: 0, reset_time_duration: 0
acc_util_pct: 0, reset_time_duration: 0
acc_util_pct: 0, reset_time_duration: 0
acc_util_pct: 0, reset_time_duration: 0
acc_util_pct: 0, reset_time_duration: 0
acc_util_pct: 0, reset_time_duration: 0
acc_util_pct: 0, reset_time_duration: 0
acc_util_pct: 0, reset_time_duration: 0
acc_util_pct: 0, reset_time_duration: 0
acc_util_pct: 0, reset_time_duration: 0
acc_util_pct: 0, reset_time_duration: 0
acc_util_pct: 0, reset_time_duration: 0
acc_util_pct: 0, reset_time_duration: 0
acc_util_pct: 0, reset_time_duration: 0
acc_util_pct: 0, reset_time_duration: 0
acc_util_pct: 0, reset_time_duration: 0
acc_util_pct: 0, reset_time_duration: 0
acc_util_pct: 0, reset_time_duration: 0
acc_util_pct: 0, reset_time_duration: 0
acc_util_pct: 0, reset_time_duration: 0
acc_util_pct: 0, reset_time_duration: 0
acc_util_pct: 0, reset_time_duration: 0
call_count: 0, total_time: 0, total_cputime: 0
Error fetching data: {'error': {'message': 'Invalid parameter', 'type': '

In [6]:
import pandas as pd

def insights_json_to_df(json_data, fields, metrics):
    """
    Transforms JSON data with media insights into a Pandas DataFrame.

    Parameters:
        json_data (list): List of dictionaries containing media data with insights.
        fields (list): List of fields to include in the DataFrame (e.g., ['id', 'caption', 'media_type']).
        metrics (list): List of metrics included in insights to include as columns in the DataFrame.

    Returns:
        pd.DataFrame: A DataFrame where each row is a media item with specified fields and metrics.
    """
    # Initialize a list to store each row of media data with insights
    data_rows = []

    # Iterate over each media item in the JSON data
    for media in json_data:
        row = {field: media.get(field, None) for field in fields}  # Extract basic fields
        
        # Extract insights (metrics)
        insights = media.get("insights", {})
        for metric in metrics:
            row[metric] = insights.get(metric, None)  # Add each metric value to the row
        
        # Append the row to the data list
        data_rows.append(row)

    # Create DataFrame from the list of rows
    df = pd.DataFrame(data_rows, columns=fields + metrics)
    return df


In [19]:


df = insights_json_to_df(media_data_with_insights, ig_media_fields, ig_all_metrics)

# Display the DataFrame
print(df.head())

                  id                                            caption  \
0  17962634378698732  Election season may be tense, but art is here ...   
1  17922066083988006  Café des Fleurs 🌸 — one of our most adored pie...   
2  18153455083331708  🎉 November Art Sale Alert! 🎉\n\nSnag your favo...   
3  18013270619394114  Art lovers, the moment you’ve been waiting for...   
4  17920054823996004    We take the rules very seriously around here! 😄   

  media_type                                          media_url  \
0      VIDEO  https://instagram.fhfa1-1.fna.fbcdn.net/o1/v/t...   
1      VIDEO  https://instagram.fhfa1-1.fna.fbcdn.net/o1/v/t...   
2      IMAGE  https://scontent.cdninstagram.com/v/t51.75761-...   
3      VIDEO                                               None   
4      VIDEO  https://instagram.fhfa1-1.fna.fbcdn.net/o1/v/t...   

                                     permalink  \
0  https://www.instagram.com/reel/DB_ltAvx45z/   
1  https://www.instagram.com/reel/DB9hRoYv_Jc/

In [20]:
df.to_csv("media_data_with_insights.csv", index=False)


In [21]:
df.head()

Unnamed: 0,id,caption,media_type,media_url,permalink,thumbnail_url,timestamp,comments_count,media_product_type,boost_ads_list,...,plays,total_interactions,follows,profile_visits,profile_activity,navigation,ig_reels_video_view_total_time,ig_reels_avg_watch_time,clips_replays_count,ig_reels_aggregated_all_plays_count
0,17962634378698732,"Election season may be tense, but art is here ...",VIDEO,https://instagram.fhfa1-1.fna.fbcdn.net/o1/v/t...,https://www.instagram.com/reel/DB_ltAvx45z/,https://scontent.cdninstagram.com/v/t51.75761-...,2024-11-05T14:48:18+0000,29,REELS,,...,11189.0,1029.0,,,,,52778075.0,4727.0,4734.0,15923.0
1,17922066083988006,Café des Fleurs 🌸 — one of our most adored pie...,VIDEO,https://instagram.fhfa1-1.fna.fbcdn.net/o1/v/t...,https://www.instagram.com/reel/DB9hRoYv_Jc/,https://scontent.cdninstagram.com/v/t51.75761-...,2024-11-04T19:36:40+0000,94,REELS,,...,85828.0,4322.0,,,,,489290224.0,5722.0,17294.0,103122.0
2,18153455083331708,🎉 November Art Sale Alert! 🎉\n\nSnag your favo...,IMAGE,https://scontent.cdninstagram.com/v/t51.75761-...,https://www.instagram.com/p/DB7IHUTRd0s/,,2024-11-03T21:12:11+0000,3,FEED,,...,,63.0,1.0,99.0,62.0,,,,,
3,18013270619394114,"Art lovers, the moment you’ve been waiting for...",VIDEO,,https://www.instagram.com/reel/DBwKmw8xCBM/,https://scontent.cdninstagram.com/v/t51.75761-...,2024-10-30T15:03:04+0000,7,REELS,,...,11669.0,231.0,,,,,59578377.0,5100.0,4526.0,16199.0
4,17920054823996004,We take the rules very seriously around here! 😄,VIDEO,https://instagram.fhfa1-1.fna.fbcdn.net/o1/v/t...,https://www.instagram.com/reel/DBrUzM0RBzR/,https://scontent.cdninstagram.com/v/t51.75761-...,2024-10-28T17:58:04+0000,28,REELS,,...,11968.0,917.0,,,,,65999253.0,5510.0,3993.0,16083.0


## IG Page Insights


In [19]:
import requests
from datetime import datetime, timedelta
import pandas as pd

import json
import time
from typing import Optional
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def check_rate_limit_2(response, pause_threshold: int = 80) -> None:
    """
    Check API rate limits from response headers and pause if necessary.
    
    Parameters:
    -----------
    response : requests.Response
        The response object from an API request
    pause_threshold : int, optional
        Percentage threshold at which to pause requests (default: 80)
        
    Returns:
    --------
    None
    """
    def parse_header(header_value: Optional[str]) -> dict:
        """Parse JSON header value into dictionary."""
        if not header_value:
            return {}
        try:
            return json.loads(header_value)
        except json.JSONDecodeError:
            logger.warning(f"Failed to parse header value: {header_value}")
            return {}

    # Get usage headers
    app_usage = parse_header(response.headers.get('x-app-usage'))
    account_usage = parse_header(response.headers.get('x-ad-account-usage'))
    
    # Check app usage limits
    if app_usage:
        metrics = {
            'call_count': app_usage.get('call_count', 0),
            'total_time': app_usage.get('total_time', 0),
            'total_cputime': app_usage.get('total_cputime', 0)
        }
        
        # Find highest usage percentage
        max_usage = max(metrics.values())
        max_metric = max(metrics.items(), key=lambda x: x[1])[0]
        
        if max_usage >= pause_threshold:
            pause_time = 60  # Default pause of 1 minute
            logger.warning(
                f"Rate limit threshold reached: {max_metric} at {max_usage}%. "
                f"Pausing for {pause_time} seconds."
            )
            time.sleep(pause_time)
    
    # Check ad account usage limits
    if account_usage:
        usage_pct = account_usage.get('acc_id_util_pct', 0)
        reset_time = account_usage.get('reset_time_duration', 60)
        
        if usage_pct >= pause_threshold:
            logger.warning(
                f"Ad account usage threshold reached: {usage_pct}%. "
                f"Pausing for {reset_time} seconds."
            )
            time.sleep(reset_time)

def get_daily_metrics(
    ig_user_id,
    access_token,
    metric_param,
    start_date,
    end_date,
    breakdown=None
):
    """
    Calculate daily values for Instagram metrics that only support total_value
    by making multiple API calls with single-day ranges.
    
    Parameters:
    -----------
    ig_user_id : str
        Instagram User ID
    access_token : str
        Valid Instagram access token
    metric_param : str
        List of metrics to retrieve (e.g., 'shares,saved')
    start_date : datetime
        Start date for the range
    end_date : datetime
        End date for the range
    breakdown : str, optional
        Breakdown parameter (e.g., 'media_product_type')
        
    Returns:
    --------
    pandas.DataFrame
        Daily metrics with dates and values
    """
    base_url = f"https://graph.facebook.com/v21.0/{ig_user_id}/insights"
    daily_data = []
    
    current_date = start_date
    while current_date <= end_date:
        # Calculate the timestamps for a single day
        since_timestamp = int(current_date.timestamp())
        until_timestamp = int((current_date + timedelta(days=1)).timestamp())
        
        # Prepare API parameters
        params = {
            'metric': metric_param,
            'period': 'day',
            'metric_type': 'total_value',
            'since': since_timestamp,
            'until': until_timestamp,
            'access_token': access_token
        }
        
        if breakdown:
            params['breakdown'] = breakdown
            
        # Make API request
        response = requests.get(base_url, params=params)
        check_rate_limit_2(response)
        
        print(response.json())
        
        if response.status_code == 200:
            data = response.json()
            day_data = {}
            
            if 'data' in data and len(data['data']) > 0:
                day_data['date'] = current_date.date()
                for metric in data['data']:
                    # print(metric)
                    if 'total_value' in metric:
                        day_data[metric['name']] = metric['total_value']['value']
                    
                    if breakdown and 'breakdowns' in metric['total_value']:
                        for breakdown_data in metric['total_value']['breakdowns'][0]['results']:
                            day_data[f'{metric["name"]}_{breakdown_data["name"]}'] = breakdown_data['value']
                
                daily_data.append(day_data)
            
        current_date += timedelta(days=1)
    
    # Convert to DataFrame
    df = pd.DataFrame(daily_data)
    
    
    return df


In [62]:
allowed_metrics = 'impressions, reach, follower_count, email_contacts, phone_call_clicks, text_message_clicks, get_directions_clicks, website_clicks, profile_views, online_followers, accounts_engaged, total_interactions, likes, comments, shares, saves, replies, engaged_audience_demographics, reached_audience_demographics, follower_demographics, follows_and_unfollows, profile_links_taps, views, threads_likes, threads_replies, reposts, quotes, threads_followers, threads_follower_demographics'.split(', ')
allowed_metrics = set(allowed_metrics)

interaction_metrics = set([
        "impressions",
        "reach",
        "total_interactions",
        "accounts_engaged",
        "likes",
        "comments",
        "saved",
        "shares",
        "replies",
        "follows_and_unfollows",
        "profile_links_taps",
        "website_clicks",
        "profile_views"
    ])


# Example usage:
if __name__ == "__main__":
    # Replace these with your actual credentials
    
    start_date = datetime.now() - timedelta(days=365*2)  # 2 years ago from today
    end_date = datetime.now()
    
    interaction_metrics = list(interaction_metrics.intersection(allowed_metrics))
    print(interaction_metrics)
    
    # Get daily shares
    start_time = time.time()
    ig_page_df = get_daily_metrics(
        ig_user_id=IG_USER_ID,
        access_token=PAGE_ACCESS_TOKEN,
        metric_param=','.join(interaction_metrics),
        start_date=start_date,
        end_date=end_date
    )
    print(f"\nTime taken: {time.time() - start_time:.2f} seconds")
    
    print("\nDaily Shares:")
    print(ig_page_df)
    
    ig_page_df.to_csv("ig_page_df.csv", index=False)
    
    # # Get daily shares with media_product_type breakdown
    # shares_by_type_df = get_daily_metrics(
    #     ig_user_id=IG_USER_ID,
    #     access_token=PAGE_ACCESS_TOKEN,
    #     metric=','.join(interaction_metrics),
    #     start_date=start_date,
    #     end_date=end_date,
    #     breakdown='follow_type'
    # )
    # print("\nDaily Shares by Media Type:")
    # print(shares_by_type_df)

['impressions', 'shares', 'comments', 'website_clicks', 'follows_and_unfollows', 'replies', 'accounts_engaged', 'total_interactions', 'reach', 'profile_views', 'profile_links_taps', 'likes']
{'data': [{'name': 'impressions', 'period': 'day', 'title': 'Impressions', 'description': 'The number of times your posts, stories, reels, videos and live videos were on screen, including in ads.', 'total_value': {'value': 514}, 'id': '17841402356724119/insights/impressions/day'}, {'name': 'shares', 'period': 'day', 'title': 'Shares', 'description': 'The number of shares of your posts, stories, reels, videos and live videos.', 'total_value': {'value': 0}, 'id': '17841402356724119/insights/shares/day'}, {'name': 'comments', 'period': 'day', 'title': 'Comments', 'description': 'The number of comments on your posts, reels, videos, and live videos.', 'total_value': {'value': 0}, 'id': '17841402356724119/insights/comments/day'}, {'name': 'website_clicks', 'period': 'day', 'title': 'Website link taps', '

In [7]:
# Get timestamps for specific dates
nov_8 = int(datetime(2023, 11, 8).timestamp())
nov_12 = int(datetime(2023, 11, 12).timestamp())

print(f"2023-11-08 timestamp: {nov_8}")
print(f"2023-11-12 timestamp: {nov_12}")



2023-11-08 timestamp: 1699394400
2023-11-12 timestamp: 1699740000


In [9]:
now_ts = int(datetime.now().timestamp())
now_90d = datetime.now() - timedelta(days=90)
now_90d_ts = int(now_90d.timestamp())

now_ts, now_90d_ts

(1731013470, 1723233870)

In [54]:
ig_page_df

Unnamed: 0,date,impressions,shares,comments,website_clicks,replies,accounts_engaged,total_interactions,reach,profile_views,profile_links_taps,likes
0,2024-10-31,3555,1,3,26,0,49,52,2792,77,0,45
1,2024-11-01,3349,11,9,7,0,41,63,1860,77,0,39
2,2024-11-02,2042,1,2,12,0,11,10,1203,72,0,5
3,2024-11-03,5917,4,3,51,1,69,68,4039,140,0,49
4,2024-11-04,29591,145,38,441,1,2077,2570,22667,795,0,2124
5,2024-11-05,33500,76,83,211,1,1789,2135,25785,560,0,1751
6,2024-11-06,56400,47,23,165,0,1068,1264,51036,547,0,1073


In [40]:
interaction_metrics


{'accounts_engaged',
 'comments',
 'follows_and_unfollows',
 'impressions',
 'likes',
 'profile_links_taps',
 'profile_views',
 'reach',
 'replies',
 'saved',
 'shares',
 'total_interactions',
 'website_clicks'}

In [41]:
interaction_metrics = set(interaction_metrics)

interaction_metrics.intersection(allowed_metrics)


{'accounts_engaged',
 'comments',
 'follows_and_unfollows',
 'impressions',
 'likes',
 'profile_links_taps',
 'profile_views',
 'reach',
 'replies',
 'shares',
 'total_interactions',
 'website_clicks'}

## FB Page Insights


In [7]:
fb_post_metrics = [
    "post_clicks",
    "post_clicks_by_type",
    "post_impressions",
    "post_impressions_unique",
    "post_impressions_paid",
    "post_impressions_paid_unique",
    "post_impressions_fan",
    "post_impressions_fan_unique",
    "post_impressions_organic",
    "post_impressions_organic_unique",
    "post_impressions_viral",
    "post_impressions_viral_unique",
    "post_impressions_nonviral",
    "post_impressions_nonviral_unique",
    "post_video_avg_time_watched",
    "post_video_complete_views_organic",
    "post_video_complete_views_organic_unique",
    "post_video_complete_views_paid",
    "post_video_complete_views_paid_unique",
    "post_video_retention_graph",
    "post_video_retention_graph_clicked_to_play",
    "post_video_retention_graph_autoplayed",
    "post_video_views_organic",
    "post_video_views_organic_unique",
    "post_video_views_paid",
    "post_video_views",
    "post_video_social_actions_count_unique",
    "post_activity_by_action_type",
    "post_activity_by_action_type_unique",
    "post_reactions_like_total",
    "post_reactions_love_total",
    "post_reactions_wow_total",
    "post_reactions_haha_total",
    "post_reactions_sorry_total",
    "post_reactions_anger_total"
]


In [30]:
import requests
import json

# Replace with your actual Page ID and Access Token

def fetch_facebook_posts_with_insights(page_id, fields, metrics):
    """
    Fetches all posts from a Facebook Page with specified insights.

    Parameters:
        page_id (str): The Facebook Page ID.
        fields (list): List of fields to retrieve for each post (e.g., ['id', 'message']).
        metrics (list): List of insight metrics to retrieve (e.g., ['post_impressions', 'post_reactions_by_type_total']).

    Returns:
        list: A list of dictionaries containing post data with insights.
    """
    all_posts_data = []
    url = f"https://graph.facebook.com/v21.0/{page_id}/posts"
    params = {
        "fields": ",".join(fields) + f",insights.metric({','.join(metrics)})" + ".period(lifetime)",
        "access_token": PAGE_ACCESS_TOKEN,
        # "limit": 100  # Max posts per request for efficiency
    }

    i = 1
    while url:
        response = requests.get(url, params=params)
        
        # Check for rate limits (assuming check_rate_limit function is defined separately)
        if not check_rate_limit(response):
            continue
        
        data = response.json()
        
        if "data" in data:
            for post in data["data"]:
                post_info = {field: post.get(field, None) for field in fields}
                post_info["insights"] = {}

                # Process insights data
                if "insights" in post:
                    for insight in post["insights"]["data"]:
                        metric_name = insight["name"]
                        metric_value = insight["values"][0]["value"]
                        post_info["insights"][metric_name] = metric_value

                all_posts_data.append(post_info)
                print(i, end='\r')
                i += 1
            
            # Get next page URL for pagination
            url = data.get("paging", {}).get("next")
        else:
            print("Error fetching data:", data)
            break

    return all_posts_data

# Example usage
fb_post_fields = ["id", "message", "promotion_status", "status_type", "created_time", "properties"]

# test_metrics = ['post_impressions', 'post_engagement', 'post_reactions_by_type_total']

# posts_data_with_insights = fetch_facebook_posts_with_insights(PAGE_ID, fb_post_fields, test_metrics)
posts_data_with_insights = fetch_facebook_posts_with_insights(PAGE_ID, fb_post_fields, fb_post_metrics)

save_to_json(posts_data_with_insights, "posts_data_with_insights.json")

fb_post_df = insights_json_to_df(posts_data_with_insights, fb_post_fields, fb_post_metrics)

business_usage:  {'124319427612935': [{'type': 'pages', 'call_count': 1, 'total_cputime': 1, 'total_time': 1, 'estimated_time_to_regain_access': 0}]}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
business_usage:  {'124319427612935': [{'type': 'pages', 'call_count': 1, 'total_cputime': 1, 'total_time': 1, 'estimated_time_to_regain_access': 0}]}
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
business_usage:  {'124319427612935': [{'type': 'pages', 'call_count': 1, 'total_cputime': 1, 'total_time': 1, 'estimated_time_to_regain_access': 0}]}
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
business_usage:  {'124319427612935': [{'type': 'pages', 'call_count': 1, 'total_cputime': 1, 'total_time': 1, 'estimated_time_to_regain_access': 0}]}
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
business_usage:  {'124319427612935': [{'type': 'pages', 'call_count': 1, 'total_cputime': 1, 'total_time': 1

Unnamed: 0,id,message,promotion_status,status_type,created_time,properties,post_clicks,post_clicks_by_type,post_impressions,post_impressions_unique,...,post_video_views,post_video_social_actions_count_unique,post_activity_by_action_type,post_activity_by_action_type_unique,post_reactions_like_total,post_reactions_love_total,post_reactions_wow_total,post_reactions_haha_total,post_reactions_sorry_total,post_reactions_anger_total
0,124319427612935_1111928650936172,🎉 November Art Sale Alert! 🎉\n\nSnag your favo...,inactive,added_photos,2024-11-03T21:12:14+0000,,16,"{'other clicks': 1, 'photo view': 10, 'link cl...",358,357,...,0,0,{'like': 5},{'like': 5},3,2,0,0,0,0
1,124319427612935_1089125369883167,You are in our hearts.,inactive,added_photos,2024-10-07T20:35:31+0000,,216,"{'other clicks': 208, 'photo view': 8}",1888,1877,...,0,0,"{'share': 2, 'like': 52, 'comment': 3}","{'share': 2, 'like': 52, 'comment': 3}",42,2,0,0,8,0
2,124319427612935_1083224957139875,✨ We just finished this stunning piece!✨\n\nDo...,inactive,added_photos,2024-09-30T23:24:18+0000,,111,"{'other clicks': 53, 'photo view': 58}",1292,1292,...,0,0,"{'share': 2, 'like': 65, 'comment': 7}","{'share': 2, 'like': 65, 'comment': 7}",41,19,6,0,0,0
3,124319427612935_1025065006289204,✨ This one is for the book lovers 📚✨\nTag a fr...,inactive,added_video,2024-07-15T18:10:57+0000,"[{'name': 'Length', 'text': '00:21'}]",8,{'other clicks': 8},447,413,...,257,22,"{'share': 3, 'like': 20, 'comment': 3}","{'share': 3, 'like': 19, 'comment': 3}",11,9,0,0,0,0
4,124319427612935_997016089094096,Your words mean the world to us! 🌟 Swipe to se...,inactive,added_photos,2024-06-06T14:09:08+0000,,15,"{'other clicks': 3, 'photo view': 12}",481,448,...,0,0,"{'like': 7, 'comment': 2}","{'like': 7, 'comment': 2}",6,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
587,124319427612935_131232596933249,היריד בצפון תל אביב ביום שישי הקרוב,ineligible,mobile_status_update,2010-11-21T18:58:14+0000,,0,{},2,2,...,0,0,{},{},0,0,0,0,0,0
588,124319427612935_105280656208747,קבלו את האלבום החדש שלנו.,ineligible,mobile_status_update,2010-11-06T20:11:09+0000,,0,{},2,2,...,0,0,{},{},0,0,0,0,0,0
589,124319427612935_150000208363797,המשלוח הגדול בדרך.. עוד חודש בערך אני עולה לארץ..,ineligible,mobile_status_update,2010-09-12T16:08:06+0000,,0,{},2,2,...,0,0,{},{},0,0,0,0,0,0
590,124319427612935_143902568974864,אתמול ראיתי את צעצוע של סיפור בתלת מימד וזה הז...,ineligible,mobile_status_update,2010-08-19T08:38:33+0000,,0,{},3,3,...,0,0,{'like': 2},{},2,0,0,0,0,0


In [31]:

fb_post_df = insights_json_to_df(posts_data_with_insights, fb_post_fields, fb_post_metrics)

fb_post_df.to_csv("fb_post_df.csv", index=False)
fb_post_df.head()


Unnamed: 0,id,message,promotion_status,status_type,created_time,properties,post_clicks,post_clicks_by_type,post_impressions,post_impressions_unique,...,post_video_views,post_video_social_actions_count_unique,post_activity_by_action_type,post_activity_by_action_type_unique,post_reactions_like_total,post_reactions_love_total,post_reactions_wow_total,post_reactions_haha_total,post_reactions_sorry_total,post_reactions_anger_total
0,124319427612935_1111928650936172,🎉 November Art Sale Alert! 🎉\n\nSnag your favo...,inactive,added_photos,2024-11-03T21:12:14+0000,,16,"{'other clicks': 1, 'photo view': 10, 'link cl...",358,357,...,0,0,{'like': 5},{'like': 5},3,2,0,0,0,0
1,124319427612935_1089125369883167,You are in our hearts.,inactive,added_photos,2024-10-07T20:35:31+0000,,216,"{'other clicks': 208, 'photo view': 8}",1888,1877,...,0,0,"{'share': 2, 'like': 52, 'comment': 3}","{'share': 2, 'like': 52, 'comment': 3}",42,2,0,0,8,0
2,124319427612935_1083224957139875,✨ We just finished this stunning piece!✨\n\nDo...,inactive,added_photos,2024-09-30T23:24:18+0000,,111,"{'other clicks': 53, 'photo view': 58}",1292,1292,...,0,0,"{'share': 2, 'like': 65, 'comment': 7}","{'share': 2, 'like': 65, 'comment': 7}",41,19,6,0,0,0
3,124319427612935_1025065006289204,✨ This one is for the book lovers 📚✨\nTag a fr...,inactive,added_video,2024-07-15T18:10:57+0000,"[{'name': 'Length', 'text': '00:21'}]",8,{'other clicks': 8},447,413,...,257,22,"{'share': 3, 'like': 20, 'comment': 3}","{'share': 3, 'like': 19, 'comment': 3}",11,9,0,0,0,0
4,124319427612935_997016089094096,Your words mean the world to us! 🌟 Swipe to se...,inactive,added_photos,2024-06-06T14:09:08+0000,,15,"{'other clicks': 3, 'photo view': 12}",481,448,...,0,0,"{'like': 7, 'comment': 2}","{'like': 7, 'comment': 2}",6,1,0,0,0,0


In [26]:
for metric in fb_post_metrics:
    url = f"https://graph.facebook.com/v21.0/124319427612935_1111928650936172/insights/{metric}/lifetime"
    params = {
        "access_token": PAGE_ACCESS_TOKEN
    }
    response = requests.get(url, params=params)
    if 'error' in response.json():
        print('ERROR: ', end='')
    print(metric, response.json())

post_clicks {'data': [{'name': 'post_clicks', 'period': 'lifetime', 'values': [{'value': 16}], 'title': 'Lifetime Matched Audience Targeting Consumptions on Post', 'description': 'Lifetime: The number of clicks anywhere in your post on News Feed from the user that matched the audience targeting on it. (Total Count)', 'id': '124319427612935_1111928650936172/insights/post_clicks/lifetime'}], 'paging': {'previous': 'https://graph.facebook.com/v21.0/124319427612935_1111928650936172/insights?access_token=EAAFwrJWP4CIBOzFp2qlM0f2YADANbOCBPbEWxDlfxs9ZA85E1L8faNh5LcZCzxPExrkkukbq41ZBx67qXSrolIAbphbwCqgJfmuXDADit7UIhZCqxDdQhciw9dflZBuFBscZA8Rb85uBfULaZAsMibXFSzDFqyTLDsEoJ7zUM8yzGVat0QtiEMbYjfi4dXfb8gTo9ipL3NZC0glGS6gZD&metric=post_clicks&period=lifetime&since=1730444400&until=1730617200', 'next': 'https://graph.facebook.com/v21.0/124319427612935_1111928650936172/insights?access_token=EAAFwrJWP4CIBOzFp2qlM0f2YADANbOCBPbEWxDlfxs9ZA85E1L8faNh5LcZCzxPExrkkukbq41ZBx67qXSrolIAbphbwCqgJfmuXDADit7UIhZC

In [None]:
# 124319427612935/videos?fields=video_insights,id,permalink_url,views,content_category,published,privacy,post_views,live_status,length,is_crosspost_video,description,custom_labels

In [42]:
def insights_json_to_df_generic(json_data, fields=None, metrics=None):
    """
    Transforms JSON data with media insights into a Pandas DataFrame.

    Parameters:
        json_data (list): List of dictionaries containing media data with insights.
        fields (list): List of fields to include in the DataFrame. If None, uses all fields found in data.
        metrics (list): List of metrics to include as columns. If None, uses all metrics found in insights.

    Returns:
        pd.DataFrame: A DataFrame where each row is a media item with specified fields and metrics.
    """
    # Initialize a list to store each row of media data with insights
    data_rows = []

    # If fields not provided, get them from first item's keys (excluding insights)
    if fields is None and json_data:
        fields = [k for k in json_data[0].keys() if k != "insights"]

    # If metrics not provided, get them from first item's insights
    # if metrics is None and json_data:
    #     first_insights = json_data[0].get("insights", {})
    #     metrics = list(first_insights.keys())

    # Iterate over each media item in the JSON data
    for media in json_data:
        # Extract basic fields
        row = {field: media.get(field, None) for field in fields}
        
        # Extract insights (metrics)
        insights = media.get("insights", {})
        for metric in insights.keys():
            row[metric] = insights.get(metric, None)
        
        # Append the row to the data list
        data_rows.append(row)

    # Create DataFrame from the list of rows
    df = pd.DataFrame(data_rows)
    return df

## FB Videos Insights


In [3]:
def fb_video_insights_url_construct(page_id, fields, metrics):
    url = f"https://graph.facebook.com/v21.0/{page_id}/videos"
    
    insights_str = f"video_insights.metric({','.join(metrics)})" if metrics is not None else "video_insights"
    
    params = {
        "fields": ",".join(fields) + f",{insights_str}",
        "access_token": PAGE_ACCESS_TOKEN,
    }

    return url, params


In [37]:
def fetch_facebook_videos_with_insights(page_id, fields, metrics=None):
    """
    Fetches all videos from a Facebook Page with specified insights.

    Parameters:
        page_id (str): The Facebook Page ID.
        fields (list): List of fields to retrieve for each post (e.g., ['id', 'message']).
        metrics (list): List of insight metrics to retrieve (e.g., ['post_impressions', 'post_reactions_by_type_total']).

    Returns:
        list: A list of dictionaries containing post data with insights.
    """
    all_posts_data = []
    url, params = fb_video_insights_url_construct(page_id, fields, metrics)
    print(url, params)
    i = 1
    while url:
        response = requests.get(url, params=params)
        
        # Check for rate limits (assuming check_rate_limit function is defined separately)
        if not check_rate_limit(response):
            continue
        
        data = response.json()
        
        if "data" in data:
            for post in data["data"]:
                post_info = {field: post.get(field, None) for field in fields}
                post_info["insights"] = {}

                # Process insights data
                if "video_insights" in post:
                    for insight in post["video_insights"]["data"]:
                        metric_name = insight["name"]
                        metric_value = insight["values"][0]["value"]
                        post_info["insights"][metric_name] = metric_value

                all_posts_data.append(post_info)
                print(i, end='\r')
                i += 1
            
            # Get next page URL for pagination
            url = data.get("paging", {}).get("next")
        else:
            print("Error fetching data:", data)
            break

    return all_posts_data


In [38]:

# Example usage
fb_video_fields = ["id","created_time", "post_id", "permalink_url", "content_category", "published", "privacy", "length", "is_crosspost_video", "description"]

# test_metrics = ['post_impressions', 'post_engagement', 'post_reactions_by_type_total']


In [None]:
fb_video_insights_url_construct(PAGE_ID, fb_video_fields, None)


In [None]:

# posts_data_with_insights = fetch_facebook_posts_with_insights(PAGE_ID, fb_post_fields, test_metrics)
posts_data_with_insights = fetch_facebook_videos_with_insights(PAGE_ID, fb_video_fields)

save_to_json(posts_data_with_insights, "posts_data_with_insights.json")


In [43]:

fb_video_df = insights_json_to_df_generic(posts_data_with_insights, fb_video_fields)

fb_video_df.to_csv("fb_video_df.csv", index=False)

fb_video_df.head()



Unnamed: 0,id,created_time,post_id,permalink_url,content_category,published,privacy,length,is_crosspost_video,description,...,post_video_likes_by_reaction_type,post_video_avg_time_watched,post_video_social_actions,post_video_view_time,post_impressions_unique,blue_reels_play_count,fb_reels_total_plays,fb_reels_replay_count,post_video_retention_graph,post_video_followers
0,588140973557485,2024-11-12T15:53:13+0000,1272295617445947,/1119289093533461/videos/588140973557485,ENTERTAINMENT,True,"{'allow': '', 'deny': '', 'description': 'Only...",9.033,False,,...,,,,,,,,,,
1,8686734791420854,2024-11-05T14:50:59+0000,3002728553221633,/1119289093533461/videos/8686734791420854,ENTERTAINMENT,True,"{'allow': '', 'deny': '', 'description': 'Only...",7.4,False,,...,,,,,,,,,,
2,2231180520614149,2024-11-03T21:17:23+0000,427813016797477,/1119289093533461/videos/2231180520614149,ENTERTAINMENT,True,"{'allow': '', 'deny': '', 'description': 'Only...",11.4,False,,...,,,,,,,,,,
3,3599483210349746,2024-11-01T18:20:29+0000,559036843489104,/1119289093533461/videos/3599483210349746,ENTERTAINMENT,True,"{'allow': '', 'deny': '', 'description': 'Only...",15.0,False,,...,,,,,,,,,,
4,1261345091667291,2024-11-01T18:18:54+0000,3971259183203576,/1119289093533461/videos/1261345091667291,ENTERTAINMENT,True,"{'allow': '', 'deny': '', 'description': 'Only...",15.0,False,,...,,,,,,,,,,
