> API details: 
- https://developers.facebook.com/docs/graph-api/reference/ads_archive/
- https://www.facebook.com/ads/library/?active_status=all&ad_type=political_and_issue_ads&country=US&q=BIDEN FOR PRESIDENT&sort_data[direction]=desc&sort_data[mode]=relevancy_monthly_grouped&search_type=keyword_unordered&media_type=all&content_languages[0]=es

In [2]:
import requests
import os
from dotenv import load_dotenv
import json
import pandas as pd

In [3]:
# Load .env file to access environment variables.
load_dotenv()

# Global Constants
BASE_URL = "https://graph.facebook.com/v18.0/ads_archive/?fields="
ACCESS_TOKEN = os.getenv('META_TOKEN')
TIME_RANGE_START = "2020-04-01"
TIME_RANGE_END = "2020-12-01"
SEARCH_TERMS = [
    "BIDEN FOR PRESIDENT",
    "DONALD J. TRUMP FOR PRESIDENT"
]

searches = {
    'CA 2022 Newsom vs Dahle': {
        'search_terms': ['Newsom for California Governor 2022', 'Brian Dahle for Governor 2022', 'Kevin Faulconer for Governor 2022'],
        'starts': ['2021-12-14', '2022-03-10', '2022-03-23'],
        'ends': ['2022-11-08', '2022-11-08', '2022-11-08']
    },
    'CA 2018 Newsom vs Cox': {
        'search_terms': ['Newsom for California Governor 2018', 'John Cox for Governor 2018'],
        'starts': ['2017-12-06', '2017-12-06'],
        'ends': ['2018-11-06', '2018-11-06']
    },
    'NV 2022 Lombardo vs Sisolak': {
        'search_terms': ['Lombardo for Governor', 'Committee to Elect Steve Sisolak'],
        'starts': ['2022-01-01', '2022-06-01'],
        'ends': ['2022-11-08', '2022-11-08']
    },
    'AZ 2022 Hobbs vs Lake': {
        'search_terms': ['Elect Katie Hobbs', 'Lake For AZ'],
        'starts': ['2022-01-01', '2022-06-01'],
        'ends': ['2022-11-08', '2022-11-08']
    },
    'TX 2022 Abbott vs O’Rourke': {
        'search_terms': ['Texans for Greg Abbott', 'Beto for Texas'],
        'starts': ['2022-05-01', '2022-05-01'],
        'ends': ['2022-11-08', '2022-11-08']
    },
    'TX 2018 Abbott vs Valdez': {
        'search_terms': ['Texans for Greg Abbott', 'Lupe Valdez for Governor'],
        'starts': ['2018-05-01', '2018-05-01'],
        'ends': ['2018-11-06', '2018-11-06']
    },
    'FL 2022 DeSantis vs Crist': {
        'search_terms': ['Ron DeSantis For Governor', 'Charlie Crist, Democrat for Governor'],
        'starts': ['2022-05-01', '2022-05-01'],
        'ends': ['2022-11-08', '2022-11-08']
    },
}


LANGUAGES = [
    "en",
    "es"
]

# SEARCH_TYPES = [
#     'KEYWORD_EXACT_PHRASE',
#     'KEYWORD_UNORDERED'
# ]

fields = [
    'id', 
    'page_id',
    'page_name',
    'ad_snapshot_url',
    'ad_creative_bodies',
    'ad_creative_link_captions',
    'ad_creative_link_descriptions',
    'ad_creative_link_titles',
    'bylines',
    'languages',
    'ad_delivery_start_time',
    'ad_delivery_stop_time',
    'demographic_distribution',
    'delivery_by_region'
]

def fetch_ads(search_term, language, fields=fields, search_type='KEYWORD_EXACT_PHRASE', limit=100):
    """Query the Facebook Ads Library for ads based on the provided search term."""
    params = {
        'access_token': ACCESS_TOKEN,
        'ad_type': 'POLITICAL_AND_ISSUE_ADS',
        'ad_reached_countries': ['US'],
        'ad_active_status': 'ALL',
        'ad_delivery_date_min': TIME_RANGE_START,
        'ad_delivery_date_max': TIME_RANGE_END,
        'search_terms': search_term,
        'languages': [language],
        'search_type': search_type,
        # 'publisher_platforms': ['facebook'],
        # 'estimated_audience_size_min': '10000',
        'limit': limit  # Adjust this as needed.
    }
    
    request_url = BASE_URL + ",".join(fields)
    response = requests.get(request_url, params=params)

    if response.status_code != 200:
        raise ValueError(f"Failed to fetch ads for {search_term}. Status code: {response.status_code}. \nError: {response.content.decode('utf-8')}")
    return response.json().get('data', [])

# response = fetch_ads(search_term=SEARCH_TERMS[0], language=LANGUAGES[0])
# response[0]

In [32]:
all_ads = []

# for term in SEARCH_TERMS:
term = SEARCH_TERMS[1]
for language in LANGUAGES:
    if "biden" in term.lower():
        search_type = 'KEYWORD_EXACT_PHRASE'
        limit = 1000
    else:
        search_type = 'KEYWORD_UNORDERED'
        limit = 800
        
    ads = fetch_ads(term, language, search_type=search_type, limit=limit)
    print(f"Fetched {len(ads)} ads for {term} in {language}.")
    ads_df = pd.DataFrame(ads)
    ads_df.to_csv(f'../data/ads_{language}_{term}.csv', index=False)
    all_ads.extend(ads)


df = pd.DataFrame(all_ads)
df.to_csv('../data/ads_all.csv', index=False)

Fetched 800 ads for DONALD J. TRUMP FOR PRESIDENT in en.
Fetched 703 ads for DONALD J. TRUMP FOR PRESIDENT in es.


In [35]:
df['ad_creative_bodies'].value_counts()

ad_creative_bodies
[President Trump is coming to town! Get your free tickets now >>>]                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   

# more data collection

In [5]:
all_ads = []

search_type = 'KEYWORD_UNORDERED'
limit = 800
for language in LANGUAGES:
    for key, value in searches.items():
        for term, start, end in zip(value['search_terms'], value['starts'], value['ends']):
            # print(term, start, end)
            ads = fetch_ads(term, language, search_type=search_type, limit=limit)
            print(f"Fetched {len(ads)} ads for {term} in {language}.")
            ads_df = pd.DataFrame(ads)
            ads_df.to_csv(f'../data/ads_{language}_{term}_{key}.csv', index=False)
            all_ads.extend(ads)


        
    ads = fetch_ads(term, language, search_type=search_type, limit=limit)
    print(f"Fetched {len(ads)} ads for {term} in {language}.")
    ads_df = pd.DataFrame(ads)
    ads_df.to_csv(f'../data/ads_{language}_{term}.csv', index=False)
    all_ads.extend(ads)


df = pd.DataFrame(all_ads)
df.to_csv('../data/new_ads_all.csv', index=False)

Fetched 112 ads for Newsom for California Governor 2022 in en.
Fetched 0 ads for Brian Dahle for Governor 2022 in en.
Fetched 0 ads for Kevin Faulconer for Governor 2022 in en.
Fetched 14 ads for Newsom for California Governor 2018 in en.
Fetched 4 ads for John Cox for Governor 2018 in en.
Fetched 6 ads for Lombardo for Governor in en.
Fetched 423 ads for Committee to Elect Steve Sisolak in en.
Fetched 18 ads for Elect Katie Hobbs in en.
Fetched 48 ads for Lake For AZ in en.
Fetched 673 ads for Texans for Greg Abbott in en.
Fetched 800 ads for Beto for Texas in en.
Fetched 673 ads for Texans for Greg Abbott in en.
Fetched 0 ads for Lupe Valdez for Governor in en.
Fetched 478 ads for Ron DeSantis For Governor in en.
Fetched 6 ads for Charlie Crist, Democrat for Governor in en.
Fetched 6 ads for Charlie Crist, Democrat for Governor in en.
Fetched 0 ads for Newsom for California Governor 2022 in es.
Fetched 0 ads for Brian Dahle for Governor 2022 in es.
Fetched 0 ads for Kevin Faulconer f