
## Methodology

Data from Courier's Facebook ads were downloaded from [Meta's ad library](https://www.facebook.com/ads/library/?active_status=all&ad_type=political_and_issue_ads&country=US&q=abortion%2C%20dogwood&sort_data[direction]=desc&sort_data[mode]=relevancy_monthly_grouped&search_type=keyword_unordered&media_type=all). Ads sponsered by Courier are labeled as "Paid by Courier Newsroom, Inc." To get ads featuring reproductive content, the data was filtered based on whether the ad message included the words "abortion" or "reprodutive" or "Roe v. Wade." 




In [None]:
import pandas as pd
pd.options.display.max_colwidth = 100

In [None]:
#import data 
df_abortion = pd.read_csv("fb_ads_abortion.csv")
df_reproctive = pd.read_csv("fb_ads_reproductive.csv")
df_wade = pd.read_csv("fb_ads_wade.csv")

In [None]:
#combine files and drop duplicates 
df = pd.concat([df_abortion, df_reproctive, df_wade]).drop_duplicates()

In [None]:
#clean file 
df['ad_creation_time'] = df['ad_creation_time'].apply(pd.to_datetime)

df['spend_lower_bound'] = df['spend'].str.split(',').str[0]
df['spend_upper_bound'] = df['spend'].str.split(',').str[1]

df = df.dropna(subset=['spend_upper_bound'])
df = df.dropna(subset=['spend_lower_bound'])

###Get estimated spending by averaging 'spend_upper_bound' and 'spend_lower_bound' columns 

In [None]:
df['spend_upper_bound'] = df['spend_upper_bound'].str.extract('(\d+)').astype(int)
df['spend_lower_bound'] = df['spend_lower_bound'].str.extract('(\d+)').astype(int)

df['spend_lower_bound'] = pd.to_numeric(df['spend_lower_bound']).astype('Int64')
df['spend_upper_bound'] = pd.to_numeric(df['spend_upper_bound']).astype('Int64')

df['spent_average'] = (df['spend_upper_bound'] + df['spend_lower_bound'] ) /2

In [None]:
df.sort_values('ad_creation_time').tail(2)

Unnamed: 0,ad_archive_id,page_id,page_name,ad_creation_time,ad_delivery_start_time,ad_delivery_stop_time,byline,ad_creative_bodies,ad_creative_link_titles,ad_creative_link_captions,...,spend,currency,demographic_distribution,delivery_by_region,publisher_platforms,estimated_audience_size,languages,spend_lower_bound,spend_upper_bound,spent_average
42,1060416964634532,106487520883333,Cardinal & Pine,2022-10-28,2022-10-28,2022-11-08,"Courier Newsroom, Inc.",WATCH: The most important two races in North Carolina might be the least talked about. \n\nNort...,North Carolina's Supreme Court Races Are the Most Important Races No One Is Talking About,,...,"lower_bound: 0, upper_bound: 99",USD,"{""age"":""45-54"",""gender"":""unknown"",""percentage"":0.001255},{""age"":""45-54"",""gender"":""male"",""percent...","{""region"":""Nevada"",""percentage"":0.00021},{""region"":""Aruba"",""percentage"":0.00021},{""region"":""Flor...",instagram,"lower_bound: 100, upper_bound: 1000",en,0,99,49.5
48,1935684343453129,106487520883333,Cardinal & Pine,2022-10-28,2022-10-28,2022-10-30,"Courier Newsroom, Inc.","In November, voters will have the chance to vote in TWO races that will determine control of the...",,,...,"lower_bound: 0, upper_bound: 99",USD,"{""age"":""25-34"",""gender"":""unknown"",""percentage"":0.0016},{""age"":""35-44"",""gender"":""unknown"",""percen...","{""region"":""Georgia"",""percentage"":0.000799},{""region"":""New Mexico"",""percentage"":0.0004},{""region""...",instagram,"lower_bound: 100, upper_bound: 1000",en,0,99,49.5


###Look at frequency of ads through time 

In [None]:
# frequency of ads grouped by year and month
df['ad_creation_time'].groupby([df['ad_creation_time'].dt.year, 
                                df['ad_creation_time'].dt.month]).agg('count')

ad_creation_time  ad_creation_time
2020              8                     2
                  10                    1
2021              9                     2
2022              5                    13
                  6                    32
                  7                    17
                  8                    22
                  9                    83
                  10                  106
Name: ad_creation_time, dtype: int64

###Same thing but per newsroom 

In [None]:
df['ad_creation_time'].groupby([df['page_name'], 
                                df['ad_creation_time'].dt.year, 
                                df['ad_creation_time'].dt.month]).agg('count')

page_name             ad_creation_time  ad_creation_time
Cardinal & Pine       2020              10                   1
                      2022              10                  24
Courier               2022              6                   28
Dogwood               2021              9                    2
Iowa Starting Line    2022              5                    2
                                        10                   7
The 'Gander Newsroom  2022              5                    2
                                        7                    2
                                        8                   10
                                        9                   48
                                        10                  25
The Americano         2022              8                    2
                                        9                   10
                                        10                   6
The Copper Courier    2020              8                    

###Courier spent approximatemly 5447 on abortion related ads prior to 2022 and
###823,913 during. (This is assuming that FB hasn't removed any FB ads from previous years)

In [None]:
df_2022 = df.loc[(df['ad_creation_time'] > '2021-12-31')]
df_2022['spent_average'].sum()

823913.5

In [None]:
df_2021 = df.loc[(df['ad_creation_time'] < '2022-01-01')]
df_2021['spent_average'].sum()

5447.5

###How much has each newsroom spent on facebook ads in total? 

In [None]:
df_2022.groupby([df_2022['page_name'], 
                 df_2022['ad_creation_time'].dt.year]).agg('sum')\
        .sort_values(by='spent_average', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,index,ad_archive_id,page_id,spend_lower_bound,spend_upper_bound,spent_average,mid_impressions
page_name,ad_creation_time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
The 'Gander Newsroom,2022,7493,111521088851590656,9634988738476520,335100,420513,377806.5,15422959.5
UpNorthNews,2022,3206,52589444233395800,3896527774048246,172900,222962,197931.0,9289482.0
The Copper Courier,2022,6650,62462300902980680,19992045895134256,95200,124542,109871.0,3834971.0
The Americano,2022,2021,26383674775454148,1918088355151386,66000,85682,75841.0,4053991.5
Cardinal & Pine,2022,1112,38660034204485248,2555700501199992,23500,29976,26738.0,2412988.0
The Keystone,2022,617,17038257005066508,1264681841926602,15200,18389,16794.5,456994.5
Iowa Starting Line,2022,749,16920964529768264,13582887534309150,13300,16791,15045.5,903495.5
Courier,2022,5278,30461201889493056,3100984514979960,2300,5472,3886.0,115486.0


###Now, see which demographic groups are most likely to see the ads by multiplying impressions with the percentages associated with each demographic group

In [None]:
def get_mid_impressions(row):
  impressions = row.impressions
  split = impressions.split(",")
  lower = split[0].replace('lower_bound: ', '').strip()
  if len(split) < 2:
    upper = lower
  else:
    upper = split[1].replace('upper_bound: ', '').strip()
  avg = (int(lower) + int(upper))/2
  return avg

df['mid_impressions'] = df.apply(lambda x: get_mid_impressions(x), axis=1)



In [None]:
df.reset_index(inplace = True)

In [None]:
demographics = df[['demographic_distribution', 'mid_impressions']].dropna()
demographics.head()

Unnamed: 0,demographic_distribution,mid_impressions
0,"{""age"":""18-24"",""gender"":""female"",""percentage"":0.005105},{""age"":""45-54"",""gender"":""female"",""percen...",12499.5
1,"{""age"":""13-17"",""gender"":""female"",""percentage"":3.2e-5},{""age"":""18-24"",""gender"":""female"",""percenta...",94999.5
2,"{""age"":""13-17"",""gender"":""female"",""percentage"":4.8e-5},{""age"":""35-44"",""gender"":""female"",""percenta...",64999.5
3,"{""age"":""13-17"",""gender"":""male"",""percentage"":8.8e-5},{""age"":""35-44"",""gender"":""male"",""percentage"":...",12499.5
4,"{""age"":""13-17"",""gender"":""unknown"",""percentage"":1.5e-5},{""age"":""18-24"",""gender"":""female"",""percent...",112499.5


In [None]:
from collections import defaultdict
import json
di = defaultdict(float)
for d in demographics.index:
  dd = demographics['demographic_distribution'][d]
  imps = demographics['mid_impressions'][d]
  if type(dd) == float or type(imps) == float:
    print(d)
    continue
  dd = '[' + dd + ']'
  j = json.loads(dd.strip())
  for i in j:
    age = i['age']
    gender = i['gender']
    pct = i['percentage']
    impressions = pct * imps
    di[age+'_'+gender] = di[age+'_'+gender] + impressions

  

###Results: Females in age group 25-34 are most likely to see reproductive ads 

In [None]:
sorted([(k,v) for k,v in di.items()], key=lambda x: -x[1])

[('25-34_female', 8151267.262387003),
 ('35-44_female', 6239522.0439695),
 ('25-34_male', 4094883.4731110027),
 ('45-54_female', 3601491.8780920007),
 ('35-44_male', 2903668.420279503),
 ('18-24_female', 2735455.381654),
 ('55-64_female', 2543801.215689501),
 ('65+_female', 1893854.619913499),
 ('45-54_male', 1673142.6707075012),
 ('18-24_male', 1119530.7673995001),
 ('55-64_male', 1033032.6017014999),
 ('65+_male', 680134.7565944999),
 ('25-34_unknown', 73299.99318800002),
 ('35-44_unknown', 62262.96461800001),
 ('18-24_unknown', 32076.697532500006),
 ('45-54_unknown', 31117.017942499995),
 ('55-64_unknown', 20119.4717415),
 ('65+_unknown', 17456.473872999988),
 ('13-17_male', 665.1762450000001),
 ('13-17_female', 542.0470339999999),
 ('13-17_unknown', 47.814819)]