[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1fHu8Lz80pXvpMd--CRNNIeqFmDtPFOoX)

# Imports and Installs

In [1]:
!pip install plotly

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import os
import itertools

import pandas as pd
import numpy as np
import plotly.express as px

from datetime import datetime
from google.colab import drive

# Mount Drive

In [3]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
os.chdir("/content/drive/MyDrive/Public/Roundup of Republican Presidential Candidate Digital Spending")

# Prepare Data

In [5]:
google_name_to_candidate_mapping = {
    'NIKKI HALEY FOR PRESIDENT INC.': 'Nikki Haley',
    'Perry Johnson For President Inc.': 'Perry Johnson',
    'PERRY JOHNSON FOR PRESIDENT INC.': 'Perry Johnson',
    'TIM SCOTT FOR AMERICA': 'Tim Scott',
    'VIVEK 2024': 'Vivek Ramaswamy',
    'RON DESANTIS FOR PRESIDENT': 'Ron DeSantis',
}


google_col_rename = {
    'advertiser_name': 'Candidate',
    'ad_count': 'Count',
    'total_impressions': 'Impressions',
    'total_spend': 'Spending',
}

meta_col_rename = {
    'Number of ads in Library': 'Count',
    'Amount spent (USD)': 'Spending',
    'Page name': 'Candidate',
}


meta_name_to_candidate_mapping = {
    'Donald J. Trump': 'Donald Trump',
    'Perry Johnson': 'Perry Johnson',
    'Vivek Ramaswamy': 'Vivek Ramaswamy',
    'Ron DeSantis': 'Ron DeSantis',
    'Tim Scott': 'Tim Scott',
    'Nikki Haley': 'Nikki Haley',
    'Chris Christie': 'Chris Christie',
}

meta_page_ids = [153080620724, 570953166908066, 109521013225, 104623832132932, 349068689306, 318196171580489, 180381489134, 6726182861]

In [6]:
# Meta
df_meta = pd.read_csv("FacebookAdLibraryReport_2023-06-06_US_last_90_days_advertisers.csv").rename(columns = meta_col_rename)
df_meta = df_meta[df_meta["Page ID"].isin(meta_page_ids)]
del df_meta["Page ID"], df_meta["Disclaimer"]
df_meta["Spending"] = df_meta["Spending"].astype(int)
df_meta['Candidate'] = df_meta['Candidate'].map(meta_name_to_candidate_mapping)
df_meta = df_meta.groupby('Candidate').sum().reset_index()
missing_data = pd.DataFrame.from_dict({'Candidate': ["Mike Pence", "Donald Trump", "Chris Christie"], 'Count': [0, 0, 0], 'Impressions': [0, 0, 0], 'Spending': [0, 0, 0], 'Platform': ['Meta', 'Meta', 'Meta']})
df_meta = pd.concat([df_meta, missing_data]).reset_index(drop=True)
df_meta = pd.concat([df_meta, pd.DataFrame(df_meta.sum()).T]).reset_index(drop=True)
df_meta.at[len(df_meta)-1, 'Candidate'] = 'Total'
df_meta['Platform'] = 'Meta'



# Google
df_google = pd.read_csv("google_ad_data.csv").rename(columns = google_col_rename)
del df_google['advertiser_id']
df_google['Candidate'] = df_google['Candidate'].map(google_name_to_candidate_mapping)
missing_data = pd.DataFrame([{'Candidate': "Mike Pence", 'Count': 0, 'Impressions': 0, 'Spending': 0, 'Platform': 'Google'}])
df_google = pd.concat([df_google, missing_data]).reset_index(drop=True)
df_google = pd.concat([df_google, pd.DataFrame(df_google.sum()).T]).reset_index(drop=True)
df_google.at[len(df_google)-1, 'Candidate'] = 'Total'
df_google = df_google.groupby("Candidate").sum().reset_index()
df_google['Platform'] = 'Google'



# Concat
df_spend = pd.concat([df_meta, df_google]).reset_index(drop=True)


  df_meta = pd.concat([df_meta, pd.DataFrame(df_meta.sum()).T]).reset_index(drop=True)
  df_google = pd.concat([df_google, pd.DataFrame(df_google.sum()).T]).reset_index(drop=True)


In [7]:
# View data
df_spend

Unnamed: 0,Candidate,Spending,Count,Impressions,Platform
0,Chris Christie,1287.0,13,,Meta
1,Donald Trump,419359.0,709,,Meta
2,Nikki Haley,41148.0,878,,Meta
3,Perry Johnson,155186.0,428,,Meta
4,Ron DeSantis,54582.0,799,,Meta
5,Tim Scott,47815.0,233,,Meta
6,Vivek Ramaswamy,122198.0,303,,Meta
7,Mike Pence,0.0,0,0.0,Meta
8,Donald Trump,0.0,0,0.0,Meta
9,Chris Christie,0.0,0,0.0,Meta


In [8]:
# View totals
df_spend[["Candidate", "Spending"]].groupby("Candidate").sum().sort_values("Spending", ascending=False)

Unnamed: 0_level_0,Spending
Candidate,Unnamed: 1_level_1
Total,1502925.0
Ron DeSantis,434382.0
Donald Trump,419359.0
Perry Johnson,260086.0
Vivek Ramaswamy,209098.0
Tim Scott,123915.0
Nikki Haley,54798.0
Chris Christie,1287.0
Mike Pence,0.0


In [9]:
# View spending proportion across platform
df_spend_prop = df_google[['Candidate', 'Spending']].merge(df_meta[['Candidate', 'Spending']], on='Candidate')
df_spend_prop = df_spend_prop.rename(columns={"Spending_x": "Google", "Spending_y": "Meta"})
df_spend_prop["Total"] = df_spend_prop["Google"]+df_spend_prop["Meta"]
df_spend_prop["Google_prop"] = df_spend_prop["Google"] / (df_spend_prop["Total"]+0.000000001)
df_spend_prop["Meta_prop"] = df_spend_prop["Meta"] / (df_spend_prop["Total"]+0.000000001)
display(df_spend_prop)

Unnamed: 0,Candidate,Google,Meta,Total,Google_prop,Meta_prop
0,Mike Pence,0.0,0,0.0,0.0,0.0
1,Nikki Haley,13650.0,41148,54798.0,0.249097,0.750903
2,Perry Johnson,104900.0,155186,260086.0,0.403328,0.596672
3,Ron DeSantis,379800.0,54582,434382.0,0.874346,0.125654
4,Tim Scott,76100.0,47815,123915.0,0.614131,0.385869
5,Total,661350.0,841575,1502925.0,0.440042,0.559958
6,Vivek Ramaswamy,86900.0,122198,209098.0,0.415595,0.584405


# Visualize Spending

In [10]:
fig = px.bar(
    df_spend.query("Candidate != 'Total'"),
    x="Candidate",
    y="Spending",
    color="Platform",
    color_discrete_sequence=px.colors.qualitative.T10,
    title="2024 Republican Primary Candidates' Meta and Google Ad Spending",
    )
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()

In [11]:
fig = px.bar(
    df_google.query("Candidate != 'Total'"),
x="Candidate",
y="Impressions",
color_discrete_sequence=px.colors.qualitative.T10,
title=f"2024 Republican Primary Candidates' Impressions: Google",
)
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()

In [12]:
df_google["Impressions Per Dollar"] = df_google["Impressions"] / (df_google["Spending"]+0.0000001)

plot_df = df_google.copy()
plot_df = plot_df.replace("Total", "Average")

fig = px.bar(
    plot_df,
x="Candidate",
y="Impressions Per Dollar",
color_discrete_sequence=px.colors.qualitative.T10,
title=f"2024 Republican Primary Candidates' Impressions per Advertising Dollar: Google",
)
fig.update_layout(xaxis={'categoryorder':'total descending'})
fig.show()

# Targetting Similarity

In [13]:
def jaccard(x: set, y: set):
    """
    The size of the intersection divided by the size of the union of two label sets
    """
    return len( x.intersection(y) ) / len( x.union(y) )


In [14]:
df_dict = {}

candidates = ["trump", "scott", "haley", "ramaswamy", "johnson", "christie"]

for candidate in candidates:
    temp_df = pd.read_excel("targeting_6_5_23.xlsx", sheet_name=candidate)
    temp_df["Detailed targeting"] = temp_df["Detailed targeting"].map(lambda x: x.lower().strip())
    df_dict[candidate] = temp_df

In [15]:
matrix = np.ones((len(candidates), len(candidates)))

for pair in itertools.combinations(df_dict.keys(), 2):

    candidate_1 = pair[0]
    candidate_1_idx = candidates.index(candidate_1)

    candidate_2 = pair[1]
    candidate_2_idx = candidates.index(candidate_2)


    score = jaccard(
        x= set(df_dict[pair[0]]["Detailed targeting"].values.tolist()),
        y= set(df_dict[pair[1]]["Detailed targeting"].values.tolist()),
    )
    matrix[candidate_1_idx][candidate_2_idx] = score
    matrix[candidate_2_idx][candidate_1_idx] = score


In [16]:
df_jaccard = pd.DataFrame(matrix, columns=candidates, index=candidates).round(2)
df_jaccard.style.background_gradient(cmap="Blues")

Unnamed: 0,trump,scott,haley,ramaswamy,johnson,christie
trump,1.0,0.0,0.0,0.05,0.05,0.03
scott,0.0,1.0,0.73,0.1,0.1,0.0
haley,0.0,0.73,1.0,0.07,0.09,0.0
ramaswamy,0.05,0.1,0.07,1.0,0.02,0.0
johnson,0.05,0.1,0.09,0.02,1.0,0.01
christie,0.03,0.0,0.0,0.0,0.01,1.0
