In [2]:
#dependencies
import requests
import pandas as pd
from config import fb_key
import numpy as np

ad_attributes = ['id', 'ad_snapshot_url', 'ad_creative_body', 'page_name', 'demographic_distribution', 'impressions', 'currency', 'spend']


In [3]:
# fb_ad_api class object for querying api
class fb_ad_api:
    def __init__(self, search='""'):
        #initialize object to contain lists for ad attributes
        self.id = []
        self.ad_snapshot_url = []
        self.ad_creative_body = []
        self.page_name = []
        self.demographic_distribution = []
        self.impressions = []
        self.currency = []
        self.spend = []
        #list comprehension from ad_attributes list to set returned attributes
        self.params = {
            'fields': ','.join(attribute for attribute in ad_attributes), 
            'ad_reached_countries': 'US', #countries where the ad is available
            'access_token': fb_key, #access token
            # 'ad_active_status': 'ALL',
            'limit': 100 #results per page
            }
        self.params.update({'search_terms': search}) #add search term to params if one is provided
        self.base_url = 'https://graph.facebook.com/v6.0/ads_archive?' #set base API URL
        self.ad_attributes = ['id', 'ad_snapshot_url', 'ad_creative_body', 'page_name', 'demographic_distribution', 'impressions', 'currency', 'spend']
    
    def call_api(self): #function to call api
        page_counter = 1
        page_limit = 100 # max num of pages to pull data from
        url = self.base_url

        
        while page_counter <= page_limit:
            print('Pulling page:' + str(page_counter)) # so you can know what's up
            #only include parameters for api call on page 1
            if page_counter == 1:
                response = requests.get(url, params = self.params).json()
            else:
                response = requests.get(url).json()
                
            results = response['data']
            
            for ad in results: #loop through each ad in api response page and append to attribute lists
                for attr in ad_attributes: #loop through each ad attribute and append to a list if present. Appends NaN if not present
                    if f'{attr}' in ad:
                        getattr(self, f'{attr}').append(ad[f'{attr}'])
                    else:
                        getattr(self, f'{attr}').append(np.nan)
            
            # break the loop if there isn't another page        
            if not(response['paging']['next']):
                break

            # set next api url according to paginated response    
            url = response['paging']['next']
            page_counter += 1
            

    # make pandas df
    def make_df(self):
        results_df = pd.DataFrame({
            'Ad ID': self.id,
            'Ad URL': self.ad_snapshot_url,
            'Ad Text': self.ad_creative_body,
            'Hosted Page': self.page_name,
            'Impressions': self.impressions,
            'Currency': self.currency,
            'Ad Spending': self.spend
        })

        return results_df
        

In [None]:
search_results = fb_ad_api()
search_results.call_api()

Pulling page:1
Pulling page:2
Pulling page:3
Pulling page:4
Pulling page:5
Pulling page:6
Pulling page:7
Pulling page:8
Pulling page:9
Pulling page:10
Pulling page:11
Pulling page:12
Pulling page:13
Pulling page:14
Pulling page:15
Pulling page:16
Pulling page:17
Pulling page:18
Pulling page:19
Pulling page:20
Pulling page:21
Pulling page:22
Pulling page:23
Pulling page:24
Pulling page:25
Pulling page:26
Pulling page:27
Pulling page:28
Pulling page:29
Pulling page:30
Pulling page:31
Pulling page:32
Pulling page:33
Pulling page:34


In [23]:
ads_df = search_results.make_df()

In [25]:
ads_df.to_csv('api_results.csv')

In [24]:
ads_df.shape

(10000, 7)