In [2]:
import tempfile
import zipfile
import datetime
import json
import pathlib
import operator
import collections

import pandas as pd

from google_fetch import config


In [38]:
def read_bundle(bundle_path):
    fetch_datetime = datetime.datetime.strptime(
        bundle_path.stem,
        'google-political-ads-transparency-bundle_%Y-%m-%d_%H-%M-%S',
    )

    with tempfile.TemporaryDirectory() as tmpdirname:
        tmp_dir = pathlib.Path(tmpdirname)

        with zipfile.ZipFile(bundle_path, "r") as zip_ref:
            zip_ref.extractall(tmpdirname)

            assert set([
                element.name
                for element in tmp_dir.iterdir()
            ]) == {'google-political-ads-transparency-bundle'}

            data_dir = tmp_dir / 'google-political-ads-transparency-bundle'
            """assert set([
                element.name
                for element in data_dir.iterdir()
            ]) == {
                'README.txt',
                'google-political-ads-advertiser-stats.csv',
                'google-political-ads-advertiser-weekly-spend.csv',
                'google-political-ads-campaign-targeting.csv',
                'google-political-ads-creative-stats.csv',
                'google-political-ads-geo-spend.csv',
                'google-political-ads-top-keywords-history.csv',
            }"""

            bundle_data = {}

            with open(data_dir / 'README.txt', 'r') as f:
                bundle_data['README.txt'] = f.read()

            for key in [
                'google-political-ads-advertiser-stats.csv',
                'google-political-ads-advertiser-weekly-spend.csv',
                # 'google-political-ads-campaign-targeting.csv', deprecated
                'google-political-ads-creative-stats.csv',
                'google-political-ads-geo-spend.csv', 
                # 'google-political-ads-top-keywords-history.csv', deprecated
            ]:
                bundle_data[key] = pd.read_csv(
                    data_dir / key,
                    dtype=str,
                    keep_default_na=False,
                    na_values={},
                )

    return fetch_datetime, bundle_data

In [4]:
for bundle_path in sorted(list((config.DATA_DIR / 'google').iterdir())):
    print(bundle_path)

/home/michel/projects/disinfo/political-ads-scraper/data/google/bundle-new
/home/michel/projects/disinfo/political-ads-scraper/data/google/bundle-old
/home/michel/projects/disinfo/political-ads-scraper/data/google/google-political-ads-transparency-bundle_2020-01-14_14-18-02.zip
/home/michel/projects/disinfo/political-ads-scraper/data/google/google-political-ads-transparency-bundle_2020-11-30_15-00-00.zip


In [42]:
bundle_path_1 = pathlib.Path('/home/michel/projects/disinfo/political-ads-scraper/data/google/google-political-ads-transparency-bundle_2020-01-14_14-18-02.zip')
bundle_path_2 = pathlib.Path('/home/michel/projects/disinfo/political-ads-scraper/data/google/google-political-ads-transparency-bundle_2020-11-30_15-00-00.zip')
fetch_datetime_1, bundle_data_1 = read_bundle(bundle_path_1)
fetch_datetime_2, bundle_data_2 = read_bundle(bundle_path_2)


In [6]:
bundle_data_1['README.txt'] == bundle_data_2['README.txt']

False

## Ads

In [8]:
key = 'google-political-ads-creative-stats.csv'
df_1 = bundle_data_1[key]
df_2 = bundle_data_2[key]

# assert list(df_1.columns) == list(df_2.columns)

# A few ads were run by distinct advertisers, for instance:
# https://transparencyreport.google.com/political-ads/advertiser/AR185084640594231296/creative/CR275167628257918976
# https://transparencyreport.google.com/political-ads/advertiser/AR242125929453715456/creative/CR275167628257918976
duplicate_ad_ids_1 = set([
    ad_id
    for ad_id, count in collections.Counter(df_1['Ad_ID']).items()
    if count > 1
])
duplicate_ad_ids_2 = set([
    ad_id
    for ad_id, count in collections.Counter(df_2['Ad_ID']).items()
    if count > 1
])


common_duplicates = duplicate_ad_ids_1 & duplicate_ad_ids_2
old_duplicates = duplicate_ad_ids_1 - duplicate_ad_ids_2
new_duplicates = duplicate_ad_ids_2 - duplicate_ad_ids_1

print('{} were run by several advertisers (eg {})'.format(len(duplicate_ad_ids_1), list(duplicate_ad_ids_1)[0]))
print('{} are now run by several advertisers (eg {})'.format(len(duplicate_ad_ids_2), list(duplicate_ad_ids_2)[0]))


1144 were run by several advertisers (eg CR262946281637281792)
364 are now run by several advertisers (eg CR262946281637281792)


In [9]:

indexes = ['Ad_ID', 'Advertiser_ID']
df_1.set_index(indexes, inplace=True)
df_2.set_index(indexes, inplace=True)

ids_1 = set(df_1.index)
ids_2 = set(df_2.index)

assert len(ids_1) == len(df_1)
assert len(ids_2) == len(df_2)

removed_ids = ids_1 - ids_2
added_ids = ids_2 - ids_1
common_ids = ids_1 & ids_2
assert len(ids_1) + len(added_ids) - len(removed_ids) == len(ids_2)
assert len(ids_1 | ids_2) == len(common_ids) + len(added_ids) + len(removed_ids)

print('{} ads were removed, {} were added'.format(len(removed_ids), len(added_ids)))


27937 ads were removed, 434380 were added


In [10]:
removed_ads = []
moved_ads = []

for ad_id, adv_id in removed_ids:
    try:
        new_advertisers = list(df_2.loc[[ad_id]].reset_index()['Advertiser_ID'])
    except KeyError:
        removed_ads.append((ad_id, adv_id))
    else:
        if len(new_advertisers) == 1:
            moved_ads.append((ad_id, adv_id, new_advertisers[0]))
        else:
            print(new_advertisers)

print('{} ads were removed. A few examples:\n'.format(len(removed_ads)))
for ad_id, adv_id in removed_ads[:3]:
    print('{}\n    by {}\n    was removed.\n'.format(
        'https://transparencyreport.google.com/political-ads/advertiser/{}/creative/{}'.format(adv_id, ad_id),
        'https://transparencyreport.google.com/political-ads/advertiser/{}'.format(adv_id),
    ))

print('{} ads changed its advertiser. A few examples:\n'.format(len(moved_ads)))
for ad_id, adv_id, new_adv_id in moved_ads[:3]:
    print('{}\n    by {}\n    was replaced by\n{}\n    by {}\n'.format(
        'https://transparencyreport.google.com/political-ads/advertiser/{}/creative/{}'.format(adv_id, ad_id),
        'https://transparencyreport.google.com/political-ads/advertiser/{}'.format(adv_id),
        'https://transparencyreport.google.com/political-ads/advertiser/{}/creative/{}'.format(new_adv_id, ad_id),
        'https://transparencyreport.google.com/political-ads/advertiser/{}'.format(new_adv_id),
    ))


22152 ads were removed. A few examples:

https://transparencyreport.google.com/political-ads/advertiser/AR439033893459329024/creative/CR13565362146639872
    by https://transparencyreport.google.com/political-ads/advertiser/AR439033893459329024
    was removed.

https://transparencyreport.google.com/political-ads/advertiser/AR101121809528651776/creative/CR394085995554799616
    by https://transparencyreport.google.com/political-ads/advertiser/AR101121809528651776
    was removed.

https://transparencyreport.google.com/political-ads/advertiser/AR189769109884370944/creative/CR43472490739007488
    by https://transparencyreport.google.com/political-ads/advertiser/AR189769109884370944
    was removed.

5785 ads changed its advertiser. A few examples:

https://transparencyreport.google.com/political-ads/advertiser/AR369049153717665792/creative/CR120698064305389568
    by https://transparencyreport.google.com/political-ads/advertiser/AR369049153717665792
    was replaced by
https://transpare

In [11]:
adv_with_removed_ad = set([
    adv_id
    for _, adv_id in removed_ads
])
print(adv_with_removed_ad)

{'AR302532823405821952', 'AR332261143919722496', 'AR285896250404896768', 'AR404989714928500736', 'AR360160426840817664', 'AR202327731696828416', 'AR382888706576482304', 'AR366239214313930752', 'AR415543652165615616', 'AR75218140211970048', 'AR463536510084317184', 'AR68345642782556160', 'AR112255739149418496', 'AR49205344366231552', 'AR399360215394287616', 'AR162298636498108416', 'AR500858057899638784', 'AR228077469385621504', 'AR352116399772008448', 'AR266895727404253184', 'AR185893056518553600', 'AR99443405028851712', 'AR378984890542063616', 'AR172202212607393792', 'AR150822483883196416', 'AR508126654392958976', 'AR117183337948250112', 'AR50710026028843008', 'AR112555630945894400', 'AR273259425827913728', 'AR14302447254110208', 'AR562441979049279488', 'AR522320249995919360', 'AR98744940267307008', 'AR131699502775009280', 'AR150429820793126912', 'AR452514455771676672', 'AR480998953756655616', 'AR248470386423889920', 'AR255896213079982080', 'AR95758941564174336', 'AR427601721309528064',

## Advertisers

In [13]:
key = 'google-political-ads-advertiser-stats.csv'
df_1 = bundle_data_1[key]
df_2 = bundle_data_2[key]

# assert list(df_1.columns) == list(df_2.columns)

indexes = 'Advertiser_ID'
df_1.set_index(indexes, inplace=True)
df_2.set_index(indexes, inplace=True)
ids_1 = set(df_1.index)
ids_2 = set(df_2.index)

assert len(ids_1) == len(df_1)
assert len(ids_2) == len(df_2)

removed_ids = ids_1 - ids_2
added_ids = ids_2 - ids_1
common_ids = ids_1 & ids_2
assert len(ids_1) + len(added_ids) - len(removed_ids) == len(ids_2)
assert len(ids_1 | ids_2) == len(common_ids) + len(added_ids) + len(removed_ids)

print('{} advertisers were removed, {} were added'.format(len(removed_ids), len(added_ids)))


82 advertisers were removed, 3207 were added


In [18]:
print('{} advertisers were removed. A few examples:\n'.format(len(removed_ids)))
for adv_id in list(removed_ids)[:3]:
    print('https://transparencyreport.google.com/political-ads/advertiser/{} was removed'.format(adv_id))


82 advertisers were removed. A few examples:

https://transparencyreport.google.com/political-ads/advertiser/AR343045978598670336 was removed
https://transparencyreport.google.com/political-ads/advertiser/AR46458901759000576 was removed
https://transparencyreport.google.com/political-ads/advertiser/AR340024520645541888 was removed


In [21]:
df_adv_with_removed_ad = df_2.loc[adv_with_removed_ad]
ad_adv_eu = df_adv_with_removed_ad[df_adv_with_removed_ad['Regions'].map(lambda x: isinstance(x, str) and 'EU' in x)]
ad_adv_eu

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,Advertiser_Name,Public_IDs_List,Regions,Elections,Total_Creatives,Spend_USD,Spend_EUR,Spend_INR,Spend_BGN,Spend_HRK,Spend_CZK,Spend_DKK,Spend_HUF,Spend_PLN,Spend_RON,Spend_SEK,Spend_GBP,Spend_ILS,Spend_NZD
Advertiser_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
AR382888706576482304,Momentum Mozgalom,,"EU, HU, SK",EU-Parliament,122,17700,15850,1235750,31000,117700,409000,118500,5167500,68100,75450,170250,13950,64000,27200
AR185893056518553600,Γεώργιος Λογιάδης,,"EU, GR",EU-Parliament,44,1500,1350,105000,2625,10000,35000,10000,435000,5800,6375,14250,1200,5500,2200
AR378984890542063616,ΑΒΡΑΝΤΙΝΗΣ ΑΝΑΣΤΑΣΙΟΣ (ΤΑΣΟΣ),,"EU, GR",EU-Parliament,107,5700,5000,390000,9775,37000,127000,37500,1612500,21200,23625,52500,4500,20250,8400
AR150822483883196416,Partidul Uniunea Salvati Romania,,"CY, DE, ES, EU, GB, IT, RO",EU-Parliament,304,131300,117050,9251750,228900,869800,3023000,873500,38505000,505700,558000,1250250,102550,467500,200200
AR522320249995919360,Flamesite s.r.o.,,"CZ, EU",EU-Parliament,234,1700,1550,121750,3050,11600,40000,11500,502500,6700,7425,16500,1350,6250,2600
AR272140397868744704,AHKERIEN AJATUSTEN SEURA RY,,"EU, FI",EU-Parliament,9,9600,8600,669000,16775,63700,221000,64000,2797500,36900,40875,92250,7550,34500,14600
AR42481418445520896,Concerned Citizens Limited,,"EU, GB",EU-Parliament,22,500,500,39000,975,3700,13000,3500,165000,2100,2400,5250,400,2000,800
AR24721694316822528,Volt Nederland,,"AT, BE, BG, DE, ES, EU, IT, NL, SK",EU-Parliament,19,26500,23700,1847250,46375,176100,612000,177000,7740000,102000,112875,255000,20850,95250,40600
AR165256735093686272,Atelierul de Internet SRL,,"DE, ES, EU, FR, GB, HU, IT, NL, RO",EU-Parliament,2263,475300,411100,34715500,804050,3089100,10851000,3063500,144495000,1819700,1986300,4311750,368200,1635500,720400
AR453487111245398016,KWW Koalicja Bezpartyjni i Samorządowcy,,"EU, PL",EU-Parliament,753,2100,1950,151500,3800,14400,50000,14500,645000,8400,9225,21000,1750,7500,3400


In [32]:
"""changed_weekly_spends = ['AR132631338879549440', 'AR488306308034854912', 'AR98551701098725376', 'AR119011138590474240', 'AR486291452976955392', 'AR119011138590474240', 'AR81539232560054272', 'AR419626413717454848', 'AR383080296477622272', 'AR55967615754960896', 'AR400032566754672640', 'AR201649882778304512', 'AR519831505426448384', 'AR474748779908562944', 'AR77689292595396608', 'AR81539232560054272', 'AR140403236820484096', 'AR6195198266703872', 'AR240608328529477632', 'AR488306308034854912', 'AR383251270535741440', 'AR488306308034854912', 'AR399188966458261504', 'AR485896728302583808', 'AR288284801977286656', 'AR140403236820484096', 'AR474748779908562944', 'AR98551701098725376', 'AR73057599863390208', 'AR227104401595039744', 'AR288179248861020160', 'AR94030784163217408', 'AR373464517536907264', 'AR507019171305881600', 'AR227104401595039744', 'AR94030784163217408', 'AR383251270535741440', 'AR6195198266703872', 'AR488306308034854912', 'AR390238117174444032', 'AR362903983230025728', 'AR488306308034854912', 'AR485896728302583808', 'AR185084640594231296', 'AR519831505426448384', 'AR240608328529477632', 'AR140403236820484096', 'AR352083414423175168', 'AR89517838687010816', 'AR120847323008860160', 'AR375716317350592512', 'AR60189740405620736', 'AR373464517536907264', 'AR89517838687010816', 'AR144491221052555264', 'AR390238117174444032', 'AR191840040035287040', 'AR489541609348661248', 'AR140403236820484096', 'AR210958348219056128', 'AR94030784163217408', 'AR40198145111490560', 'AR383251270535741440', 'AR375716317350592512', 'AR210958348219056128', 'AR522320249995919360', 'AR121262938404159488', 'AR94030784163217408', 'AR214800866480226304', 'AR334064892745089024', 'AR89517838687010816', 'AR216489716340490240', 'AR242125929453715456', 'AR485896728302583808', 'AR214800866480226304', 'AR144491221052555264', 'AR214800866480226304', 'AR383251270535741440', 'AR383251270535741440', 'AR400032566754672640', 'AR132631338879549440', 'AR288179248861020160', 'AR390238117174444032', 'AR119011138590474240', 'AR191840040035287040', 'AR166715924462698496', 'AR200379397092409344', 'AR315375668974059520', 'AR488306308034854912', 'AR77689292595396608', 'AR140403236820484096', 'AR488306308034854912', 'AR94030784163217408', 'AR140403236820484096', 'AR180201434577371136', 'AR17917091730423808', 'AR478461555797655552', 'AR6195198266703872', 'AR485896728302583808', 'AR432363981047332864', 'AR400032566754672640', 'AR362903983230025728', 'AR334064892745089024', 'AR60585289713713152', 'AR119011138590474240', 'AR140403236820484096', 'AR50338116220747776', 'AR210958348219056128', 'AR94030784163217408', 'AR530356305605427200', 'AR94030784163217408', 'AR240608328529477632', 'AR383251270535741440', 'AR383251270535741440', 'AR210958348219056128', 'AR40198145111490560', 'AR132631338879549440', 'AR94030784163217408', 'AR175306133932605440', 'AR40198145111490560', 'AR474748779908562944', 'AR77689292595396608', 'AR94030784163217408', 'AR383251270535741440', 'AR240608328529477632', 'AR383251270535741440', 'AR217815727363588096', 'AR503512828724903936', 'AR495298652231696384', 'AR6195198266703872', 'AR288179248861020160', 'AR419626413717454848', 'AR98551701098725376', 'AR428206177826897920', 'AR522320249995919360', 'AR120847323008860160', 'AR240608328529477632', 'AR478461555797655552', 'AR488306308034854912', 'AR32708546780987392', 'AR240608328529477632', 'AR152882144039927808', 'AR488306308034854912', 'AR488306308034854912', 'AR205658702173175808', 'AR17917091730423808', 'AR519831505426448384', 'AR240608328529477632', 'AR144491221052555264', 'AR240608328529477632', 'AR144491221052555264', 'AR383251270535741440', 'AR519831505426448384', 'AR6195198266703872', 'AR362903983230025728', 'AR400032566754672640', 'AR144491221052555264', 'AR217815727363588096', 'AR488306308034854912', 'AR400032566754672640', 'AR488306308034854912', 'AR100996740080992256', 'AR474748779908562944', 'AR488306308034854912', 'AR485896728302583808', 'AR230475229367894016', 'AR80178586920681472', 'AR288179248861020160', 'AR75218140211970048', 'AR488306308034854912', 'AR562822410072489984', 'AR198061076825243648', 'AR119011138590474240', 'AR311843487869829120', 'AR532605081762136064', 'AR530356305605427200', 'AR6195198266703872', 'AR77689292595396608', 'AR352083414423175168', 'AR120847323008860160', 'AR157291735423123456', 'AR316331969212317696', 'AR474748779908562944', 'AR6195198266703872', 'AR50710026028843008', 'AR503512828724903936', 'AR50338116220747776', 'AR13000625486823424', 'AR201649882778304512', 'AR140403236820484096', 'AR132631338879549440', 'AR94030784163217408', 'AR220642022002786304', 'AR89517838687010816', 'AR383251270535741440', 'AR352083414423175168', 'AR240608328529477632', 'AR77689292595396608', 'AR503512828724903936', 'AR300281023592136704', 'AR383251270535741440', 'AR81539232560054272', 'AR242125929453715456', 'AR214800866480226304', 'AR485896728302583808', 'AR230475229367894016', 'AR373464517536907264', 'AR376667944664432640', 'AR140403236820484096', 'AR77689292595396608', 'AR352083414423175168', 'AR315375668974059520', 'AR98551701098725376', 'AR475747136466583552', 'AR227104401595039744', 'AR50338116220747776', 'AR89517838687010816', 'AR17917091730423808', 'AR240608328529477632', 'AR334064892745089024', 'AR77689292595396608', 'AR288179248861020160', 'AR350209296853630976', 'AR374597289391423488', 'AR419626413717454848', 'AR6195198266703872', 'AR513786390496935936', 'AR140403236820484096', 'AR519831505426448384', 'AR374597289391423488', 'AR185084640594231296', 'AR488306308034854912', 'AR503512828724903936', 'AR98551701098725376', 'AR383251270535741440', 'AR350471805254762496', 'AR352083414423175168', 'AR383251270535741440', 'AR237678679797268480', 'AR94030784163217408', 'AR230475229367894016', 'AR237340030215913472', 'AR94030784163217408', 'AR474748779908562944', 'AR488306308034854912', 'AR334064892745089024', 'AR144491221052555264', 'AR242125929453715456', 'AR374590417443749888', 'AR94030784163217408', 'AR40198145111490560', 'AR240608328529477632', 'AR288179248861020160', 'AR144491221052555264', 'AR488306308034854912', 'AR428206177826897920', 'AR210958348219056128', 'AR334064892745089024', 'AR17917091730423808', 'AR271193993235136512', 'AR288179248861020160', 'AR89517838687010816', 'AR180201434577371136', 'AR140403236820484096', 'AR98551701098725376', 'AR283015942256984064', 'AR240608328529477632', 'AR80178586920681472', 'AR89517838687010816', 'AR17917091730423808', 'AR119011138590474240', 'AR488306308034854912', 'AR399188966458261504', 'AR73057599863390208', 'AR98551701098725376', 'AR194446432348930048', 'AR400032566754672640', 'AR489541609348661248', 'AR486291452976955392', 'AR485896728302583808', 'AR119011138590474240', 'AR98551701098725376', 'AR6195198266703872', 'AR194446432348930048', 'AR6195198266703872', 'AR288179248861020160', 'AR489541609348661248', 'AR94030784163217408', 'AR98551701098725376', 'AR140403236820484096', 'AR428206177826897920', 'AR183859784640888832', 'AR383251270535741440']
df_adv_with_removed_ad = df_2.loc[changed_weekly_spends]
ad_adv_eu = df_adv_with_removed_ad[df_adv_with_removed_ad['Regions'].map(lambda x: 'EU' in x)]
ad_adv_eu"""
None

## Advertisers weekly spends

In [36]:
key = 'google-political-ads-advertiser-weekly-spend.csv'
df_1 = bundle_data_1[key]
df_2 = bundle_data_2[key]

# assert list(df_1.columns) == list(df_2.columns)

indexes = ['Advertiser_ID', 'Week_Start_Date']
df_1.set_index(indexes, inplace=True)
df_2.set_index(indexes, inplace=True)

ids_1 = set(df_1.index)
ids_2 = set(df_2.index)

assert len(ids_1) == len(df_1)
assert len(ids_2) == len(df_2)

removed_ids = ids_1 - ids_2
added_ids = ids_2 - ids_1
common_ids = ids_1 & ids_2
assert len(ids_1) + len(added_ids) - len(removed_ids) == len(ids_2)
assert len(ids_1 | ids_2) == len(common_ids) + len(added_ids) + len(removed_ids)

print('{} lines were removed, {} were added'.format(len(removed_ids), len(added_ids)))

old_advertisers = set(map(operator.itemgetter(0), removed_ids))
new_advertisers = set(map(operator.itemgetter(0), added_ids))

print('Start date has been tampered for {} advertisers'.format(len(old_advertisers & new_advertisers)))

print('Example of lines have been removed:\n')
for advertiser_id in list(old_advertisers)[:3]:
    print('{} ({})\n'.format(
        'https://transparencyreport.google.com/political-ads/advertiser/{}'.format(advertiser_id),
        [
            removed_id[1]
            for removed_id in removed_ids
            if removed_id[0] == advertiser_id
        ]
    ))

print('Examples of lines that have been added about the following advertisers:\n')
for advertiser_id in list(new_advertisers)[:3]:
    print('{} ({})\n'.format(
        'https://transparencyreport.google.com/political-ads/advertiser/{}'.format(advertiser_id),
        [
            added_id[1]
            for added_id in added_ids
            if added_id[0] == advertiser_id
        ]
    ))

"""for line_id in list(common_ids):
    modifications = df_1.loc[line_id] != df_2.loc[line_id]
    modified_columns = list(modifications[modifications].axes[0])
    if modified_columns:
        print('Line {} has be modified'.format(line_id))
"""
None

973 lines were removed, 43297 were added
Start date has been tampered for 25 advertisers
Example of lines have been removed:

https://transparencyreport.google.com/political-ads/advertiser/AR279955176763162624 (['2019-08-11', '2019-10-20', '2019-10-27'])

https://transparencyreport.google.com/political-ads/advertiser/AR271717910525771776 (['2018-10-28', '2018-10-21', '2018-10-14'])

https://transparencyreport.google.com/political-ads/advertiser/AR386241117529571328 (['2019-05-19', '2019-04-28', '2019-05-05', '2019-05-12'])

Examples of lines that have been added about the following advertisers:

https://transparencyreport.google.com/political-ads/advertiser/AR338146005029486592 (['2020-10-11', '2020-10-25', '2020-10-04', '2020-10-18', '2020-11-01'])

https://transparencyreport.google.com/political-ads/advertiser/AR466924449006878720 (['2018-10-21', '2018-11-04', '2018-10-28'])

https://transparencyreport.google.com/political-ads/advertiser/AR92942748688056320 (['2020-10-11', '2020-10-1

## Total spending

In [51]:
key = 'google-political-ads-geo-spend.csv'
df_1 = bundle_data_1[key]
df_2 = bundle_data_2[key]

# assert list(df_1.columns) == list(df_2.columns)

indexes = ['Country', 'Country_Subdivision_Primary', 'Country_Subdivision_Secondary']
df_1.set_index(indexes, inplace=True)
df_2.set_index(indexes, inplace=True)
ids_1 = set(df_1.index)
ids_2 = set(df_2.index)

# assert len(ids_1) == len(df_1) == len(ids_2) == len(df_2)

removed_ids = ids_1 - ids_2
added_ids = ids_2 - ids_1
common_ids = ids_1 & ids_2
assert len(removed_ids) == 0
#assert len(added_ids) == 0

for line_id in list(common_ids):
    suspicious_modification = int(df_1.loc[line_id]['Spend_EUR']) > int(df_2.loc[line_id]['Spend_EUR'])
    if suspicious_modification:
        print('Line {} has be modified'.format(line_id))

Line ('IN', 'IN-SK', '') has be modified
Line ('EE', '', '') has be modified
Line ('GB', '', '') has be modified
Line ('IN', 'IN-AN', '') has be modified
Line ('IN', 'IN-LD', '') has be modified
Line ('IN', 'IN-DN', '') has be modified
Line ('IN', 'IN-DD', '') has be modified
Line ('IN', 'IN-NL', '') has be modified
Line ('IN', 'IN-ML', '') has be modified
Line ('IN', 'IN-AR', '') has be modified
Line ('IN', 'IN-MZ', '') has be modified
Line ('BG', '', '') has be modified


In [52]:
line_id = ('EE', '', '')
df_1.loc[line_id]['Spend_EUR'], df_2.loc[line_id]['Spend_EUR']

('21450', '21100')

In [53]:
line_id = ('GB', '', '')
df_1.loc[line_id]['Spend_EUR'], df_2.loc[line_id]['Spend_EUR']

('2031650', '1725000')

In [54]:
line_id = ('BG', '', '')
df_1.loc[line_id]['Spend_EUR'], df_2.loc[line_id]['Spend_EUR']

('10900', '10200')