This notebook provides a step-by-step guide on creating varying subsets for federal candidates from our universe of election ads. Please reference our paper (under review) on what each subset represents. 

These steps apply to both Meta and Google full-variable ("var") tables. Perform these steps after you created the "var" tables

In [None]:
import os
import numpy as np
import pandas as pd

In [None]:
'''
Import the "var table"

'''

# Define your local import directory
path = '.'

# Our Meta filename
var_filepath = f'{path}/fb_2022_adid_var.csv.gz'

# Our Google filename
var_filepath = f'{path}/g2022_adid_var.csv.gz'


var = pd.read_csv(var_filepath)

In [None]:
'''
Import entity files from the "datasets" repository

Under the folder "wmp_entity_files"
'''

# Meta entity file path
ent_path = '../../../datasets/wmp_entity_files/Facebook/wmp_fb_2022_entities_v082324.csv'
# Google entity file path
ent_path = '../../../datasets/wmp_entity_files/Google/wmp_google_2022_entities_v112822.csv'


ent = pd.read_csv(ent_path)

In [None]:
'''
Create federal candidate and federal candidate party variables
'''
ent['federal_cd'] = np.where(((ent.wmp_spontype=="campaign") | (ent.wmp_spontype=="leadership PAC")) & ((ent.wmp_office == 'us senate') | (ent.wmp_office == 'us house')), 1, 0)

ent['federal_cdpty'] = np.where((ent.federal_cd == 1) | (ent.wmp_spontype=="party national"), 1, 0)

In [None]:
'''
Merge var table with entity file 
'''
# Meta
selected_cols = ['pd_id', 'page_name', 'disclaimer', 'wmp_office', 'wmp_spontype',
       'wmpid', 'cand_id', 'federal_cd', 'federal_cdpty']

# Google
selected_cols = ['advertiser_id', 'advertiser_name', 'wmp_office', 'wmp_spontype',
       'wmpid', 'cand_id', 'federal_cd', 'federal_cdpty']

# Alternatively, you may merge with "ent" table directly, without selecting columns

var = var.merge(ent[selected_cols], how='left', on='pd_id')

In [None]:
var['wmp_spontype'] = var.wmp_spontype.str.rstrip()

In [None]:
'''
Create "Set 2"
'''
var['federal_verified_int'] = np.where(var['federal_verified'] == 'Yes', 1, 0)
grouped = var.groupby('pd_id')['federal_verified_int'].max().reset_index()

var['set2'] = np.where(var.pd_id.isin(grouped[grouped.federal_verified_int == 1].pd_id.tolist()), 1, 0)

'''
Replace "pd_id" with "advertiser_id" for Google dataset
'''

In [None]:
'''
Create "Set 3"
'''
var['set3'] = np.where((var.federal_cdpty == 1) | (var.federal_verified == 'Yes'), 1, 0)

In [None]:
'''
Create "Set 3b"
'''
var['set3b'] = np.where((~pd.isna(var.race_of_focus)) & (var.race_of_focus != 'Downballot') & (var.set3 == 1), 1, 0)

In [None]:
'''
Create "Set 4"
'''
var['set4'] = np.where(var.federal_verified == 'Yes', 1, 0)

In [None]:
'''
Create "Set 4b"
'''
var['set4b'] = np.where(((var.set4 == 1) & (var.race_of_focus != 'Downballot')), 1, 0)

In [None]:
'''
Write to final full variable "var" table in compressed csv format

We used "fb_2022_adid_var.csv.gz" and "g2022_adid_var.csv.gz" as output filenames
'''

outfile_path = '' # Define your outfile path, including filename

var.to_csv(outfile_path,
            index=False,
            compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1})