In [1]:
import pandas as pd
import re

## Utility functions

In [2]:
def is_registered_disclaimer(disclaimer, campaigner_names):
    regex = re.compile("limited|ltd|ltd\.|uk$")
    disclaimer_lower = regex.sub("", disclaimer.lower().strip())
    registered_campaigner = ""

    for campaigner_name in campaigner_names:
        if disclaimer_lower in regex.sub("", campaigner_name.lower().strip()):
            registered_campaigner = campaigner_name

    return registered_campaigner

## Load registered non-party campaigners from Electoral Commission

In [3]:
df_campaigners = pd.read_csv(
    "../data/raw/non-party-campaigners-2019-12-04.csv",
    usecols=[*range(0, 4), 5, *range(8, 15)],
    dtype={"CompanyRegistrationNumber": "str"},
    parse_dates=["ApprovedDate"],
)

print(len(df_campaigners))
print(df_campaigners.dtypes)
df_campaigners.head()

64
ECRef                                   object
RegulatedEntityName                     object
RegulatedEntityTypeName                 object
RegistrationStatusName                  object
EntityStatusName                        object
CompanyRegistrationNumber               object
ApprovedDate                    datetime64[ns]
FieldingCandidatesInEngland               bool
FieldingCandidatesInScotland              bool
FieldingCandidatesInWales                 bool
FieldingCandidatesInEurope                bool
FieldingCandidatesMinorParty              bool
dtype: object


Unnamed: 0,ECRef,RegulatedEntityName,RegulatedEntityTypeName,RegistrationStatusName,EntityStatusName,CompanyRegistrationNumber,ApprovedDate,FieldingCandidatesInEngland,FieldingCandidatesInScotland,FieldingCandidatesInWales,FieldingCandidatesInEurope,FieldingCandidatesMinorParty
0,TP10305,Jeremy Hosking,Third Party,Registered,Individual,,2019-10-14,False,False,False,False,False
1,TP10345,Richard Patrick Tracey - campaign name Parent...,Third Party,Registered,Individual,,2019-11-11,False,False,False,False,False
2,TP10246,38 Degrees,Third Party,Registered,Company,6642193.0,2019-09-30,False,False,False,False,False
3,TP10354,3rd Party Ltd,Third Party,Registered,Company,12313483.0,2019-11-28,False,False,False,False,False
4,TP11355,Advance Together,Third Party,Registered,Registered Political Party,,2019-11-14,False,False,False,False,False


## Load advertisers over last 30 days from Ad Library report

In [4]:
df_fb_30d = (
    pd.read_csv(
        "../data/raw/facebook-ad-library/FacebookAdLibraryReport_2019-12-01_GB_last_30_days/"
        "FacebookAdLibraryReport_2019-12-01_GB_last_30_days_advertisers.csv"
    )
    .rename(
        {
            "Page ID": "page_id",
            "Page Name": "page_name",
            "Disclaimer": "disclaimer",
            "Amount Spent (GBP)": "amount_spent_gbp",
            "Number of Ads in Library": "number_of_ads_in_library",
        },
        axis=1,
    )
    .query("amount_spent_gbp != '≤100'")
    .astype({"amount_spent_gbp": "int64", "number_of_ads_in_library": "int64"})
    .sort_values("amount_spent_gbp", ascending=False)
    .reset_index(drop=True)
)

print(len(df_fb_30d))
print(df_fb_30d.dtypes)
df_fb_30d.head()

997
page_id                      int64
page_name                   object
disclaimer                  object
amount_spent_gbp             int64
number_of_ads_in_library     int64
dtype: object


Unnamed: 0,page_id,page_name,disclaimer,amount_spent_gbp,number_of_ads_in_library
0,5883973269,Liberal Democrats,the Liberal Democrats,539483,6625
1,25749647410,The Labour Party,The Labour Party,414022,654
2,230416667843105,The Brexit Party,The Brexit Party,226833,1493
3,1863944713844924,Best For Britain,Best For Britain,202795,137
4,330250343871,Jeremy Corbyn,The Labour Party on behalf of Jeremy Corbyn,175323,37


In [5]:
df_registered = (
    df_fb_30d.copy()
    .assign(
        registered_disclaimer=lambda df: df["disclaimer"].apply(
            lambda x: is_registered_disclaimer(x, df_campaigners["RegulatedEntityName"])
        )
    )
    .query("registered_disclaimer != ''")
    .groupby("registered_disclaimer", as_index=False)[
        ["amount_spent_gbp", "number_of_ads_in_library"]
    ]
    .sum()
    .sort_values("amount_spent_gbp", ascending=False)
    .reset_index(drop=True)
)

print(len(df_registered))
print(df_registered.dtypes)
print(f"Total spending: £", df_registered["amount_spent_gbp"].sum())
df_registered

26
registered_disclaimer       object
amount_spent_gbp             int64
number_of_ads_in_library     int64
dtype: object
Total spending: £ 585633


Unnamed: 0,registered_disclaimer,amount_spent_gbp,number_of_ads_in_library
0,UK-EU Open Policy Limited (campaign name: Best...,202795,137
1,Greenpeace Limited,64057,72
2,National Education Union,56634,494
3,HOPE not hate Ltd,52963,2627
4,Momentum Campaign (Services) Ltd,39366,54
5,38 Degrees,32500,104
6,Friends of the Earth Limited,28211,2369
7,Working 4 UK Limited,23326,27
8,Richard Patrick Tracey - campaign name Parent...,15179,20
9,Capitalist Worker,12503,23
