In [None]:
import requests
from datetime import datetime
import pandas
import json

In [None]:
# format request URL
def params_to_query(param_dict):
    return "&".join([ key + "=" + param_dict[key] for key in param_dict.keys() if param_dict[key] is not None ])

In [None]:
# crawl facebook ads
def crawl_ads():
    token = "xxxxxxxxxxxxxxxx" # get your own token from Facebook's Graph API developer platform
        
    query = params_to_query({
            "access_token": token,
            "ad_type": "POLITICAL_AND_ISSUE_ADS",
            "ad_active_status": "ALL",
            "search_page_ids": "21289227249", # channel id of German party FDP
            "ad_reached_countries": "['DE']",
            "ad_delivery_date_min": "2023-01-15",
            #"ad_delivery_date_max": "2023-01-24", # default: today
            "fields": ",".join([
                "id",
                "ad_creation_time",
                "ad_creative_bodies",
                "ad_creative_link_captions",
                "ad_creative_link_descriptions",
                "ad_creative_link_titles",
                "ad_delivery_start_time",
                "ad_delivery_stop_time",
                "ad_snapshot_url",
                "currency",
                "delivery_by_region",
                "demographic_distribution",
                "impressions",
                "spend",
                "publisher_platforms",
                "page_id"
            ])
    })

    url = f"https://graph.facebook.com/v11.0/ads_archive?{query}"
    entries = crawl_iter_ads(url)
    entries = map(decode_ad, entries)
    df = pandas.DataFrame.from_dict(entries)
    return df


# request data via request URL
def crawl_iter_ads(url):
    while url is not None:
        print(url)
        r = requests.get(url)

        if r.status_code != 200:
            print(r.status_code)
            break

        body = r.json()
        #print(body)
        data = body['data']
        for entry in data:
            yield entry

        if len(data) == 0:
            break

        paging = body['paging']
        if 'next' in paging and paging['next'] != url:
            url = paging['next']
        else:
            url = None


# store requested data in dictionary
def decode_ad(entry):
    content_3rdparty_id=entry['id']
    content_date_fetched=datetime.now()
    content_date_created=datetime.strptime(entry['ad_creation_time'], "%Y-%m-%d")
    content_snapshot_url=entry['ad_snapshot_url']
    content_url=f"https://www.facebook.com/ads/library/?active_status=all&ad_type=political_and_issue_ads&country=DE&id={entry['id']}"
    content_type="ad"
    content_spend_lower = entry['spend']['lower_bound']
    content_spend_upper = entry['spend']['upper_bound']
    content_impression_lower = entry['impressions']['lower_bound']
    content_impression_upper = entry['impressions']['upper_bound'] if 'upper_bound' in entry['impressions'] else None
    content_creative_link_title = ";".join(entry['ad_creative_link_titles'] if 'ad_creative_link_titles' in entry else[])
    content_creative_link_caption = ";".join(entry['ad_creative_link_captions'] if 'ad_creative_link_captions' in entry else[])
    content_creative_link_body = ";".join(entry['ad_creative_bodies'] if 'ad_creative_bodies' in entry else[])
    content_creative_link_description = ";".join(entry['ad_creative_link_descriptions'] if 'ad_creative_link_descriptions' in entry else[])
    content_currency = entry['currency']
    content_delivery_start_time = entry['ad_delivery_start_time']
    content_delivery_stop_time = entry['ad_delivery_stop_time'] if 'ad_delivery_stop_time' in entry else None
    content_socio_meta = json.dumps(entry['demographic_distribution'], ensure_ascii=False) if 'demographic_distribution' in entry else None
    content_regio_meta = json.dumps(entry['delivery_by_region'], ensure_ascii=False) if 'delivery_by_region' in entry else None
    content_platforms = json.dumps(entry['publisher_platforms'])

    return {"content_3rdparty_id": content_3rdparty_id, "content_date_fetched": content_date_fetched, 
            "content_date_created": content_date_created, "content_snapshot_url": content_snapshot_url, 
            "content_url": content_url, "content_type": content_type, 
            "content_spend_lower": content_spend_lower, "content_spend_upper": content_spend_upper,
            "content_impression_lower": content_impression_lower, "content_impression_upper": content_impression_upper, 
            "content_creative_link_title": content_creative_link_title, 
            "content_creative_link_caption": content_creative_link_caption, 
            "content_creative_link_body": content_creative_link_body, 
            "content_creative_link_description": content_creative_link_description, 
            "content_currency": content_currency, 
            "content_delivery_start_time": content_delivery_start_time, 
            "content_delivery_stop_time": content_delivery_stop_time, 
            "content_socio_meta": content_socio_meta, "content_regio_meta": content_regio_meta,
            "content_platforms": content_platforms}

# run
df_ads = crawl_ads()

df_ads.to_csv("facebook_ads_output.csv", sep=";", index=False)