In [1]:
import json

from snapchat_fetch.read import read_bundle
from snapchat_fetch import config

In [2]:
archive_paths = sorted(list((config.DATA_DIR / 'snapchat' / '2019').iterdir()))

In [3]:
bundle_path_1 = archive_paths[0]
fetch_datetime_1, bundle_data_1 = read_bundle(bundle_path_1)

bundle_path_2 = archive_paths[-1]
fetch_datetime_2, bundle_data_2 = read_bundle(bundle_path_2)


In [4]:
print(bundle_data_1['readme.txt'])

SNAPCHAT POLITICAL AD LIBRARY REPORT

This file contains the information for political ads that are/have been served on Snapchat’s Advertising platform.

Fields:
AD ID - Unique identifier for each Political Ad

CreativeURL - URL link to creative content for the Ad

Spend - Amount (In USD) spent by the advertiser over the campaign (up to the current date)

Impressions - Number of times the Ad has been viewed by Snapchatters

Start Date - The time at which the Ad was set up to start delivering

End Date - The time at which the Ad was set up to stop delivering

Organization Name - The name of the Organization within Ads Manager who is responsible for creating the Ad

BillingAddress - The address of the Organization within Ads Manager who is responsible for creating the Ad

CandidateBallotInformation - The name of the candidate/ ballot initiative associated with the Ad

PayingAdvertiserName - Name of the entity providing the funds for the Ad. This is the name we will display within the Sna

In [5]:
print(bundle_data_2['readme.txt'])

SNAPCHAT POLITICAL AD LIBRARY REPORT

This file contains the information for political ads that are/have been served on Snapchat’s Advertising platform.

Fields:
AD ID - Unique identifier for each Political Ad.

CreativeURL - URL link to creative content for the Ad.

Currency Code - The currency code set for the Ad Account and Ad.

Spend - Amount (In local currency) spent by the advertiser over the campaign (up to the current date).

Impressions - Number of times the Ad has been viewed by Snapchatters.

Start Date - The time at which the Ad was set up to start delivering.

End Date - The time at which the Ad was set up to stop delivering.

Organization Name - The name of the Organization within Ads Manager who is responsible for creating the Ad.

BillingAddress - The address of the Organization within Ads Manager who is responsible for creating the Ad.

CandidateBallotInformation - The name of the candidate/ ballot initiative associated with the Ad.

PayingAdvertiserName - Name of the 

```
AD ID
CreativeURL
+Currency Code
Spend
Impressions
Start Date
End Date
Organization Name
BillingAddress
CandidateBallotInformation
PayingAdvertiserName
Gender
AgeBracket
CountryCode
-RegionID
+Regions (Included)
+Regions (Excluded)
-ElectoralDistrictID
+ElectoralDisctricts (Included)
+ElectoralDisctricts (Excluded)
-LatLongRad
+Radius Targeting (Included)
+Radius Targeting (Excluded)
-MetroID
+Metros (Included)
+Metros (Excluded)
+Postal Code (Included)
+Postal Code (Excluded)
+Location Categories (Included)
+Location Categories (Excluded)
Interests
OsType
Segments
-LocationType
Language
AdvancedDemographics
Target Connection Type
Targeting Carrier (ISP)
-Targeting Geo-Postal Code
CreativeProperties
```

In [6]:
key = 'PoliticalAds.csv'
df_1 = bundle_data_1[key]
df_2 = bundle_data_2[key]

assert len(set(df_1['ADID'])) == len(df_1['ADID'])
assert len(set(df_2['ADID'])) == len(df_2['ADID'])

ids_1 = set(df_1['ADID'])
ids_2 = set(df_2['ADID'])

common_ids = ids_1 & ids_2
removed_ids = ids_1 - ids_2
new_ids = ids_2 - ids_1

print('{} commond ids; {} new ids; {} removed ids'.format(len(common_ids), len(new_ids), len(removed_ids)))

assert not removed_ids


2282 commond ids; 854 new ids; 0 removed ids


In [7]:
df_1.set_index('ADID', inplace=True)
df_2.set_index('ADID', inplace=True)

In [8]:
important_fields = [
    'CreativeUrl',
    'StartDate',
    'OrganizationName',
    'CandidateBallotInformation',
    'PayingAdvertiserName',
    'CountryCode',
    'Language',
    'CreativeProperties',
]
nb_modified = {
    important_field: 0
    for important_field in important_fields
}
increasing_fields = ['Spend', 'Impressions']
nb_decreasing = {
    increasing_field: 0
    for increasing_field in increasing_fields 
}

for adid in common_ids:
    old_row = df_1.loc[adid]
    new_row = df_2.loc[adid]
    
    for important_field in important_fields:
        if old_row[important_field] != new_row[important_field]:
            nb_modified[important_field] += 1

    for increasing_field in increasing_fields:
        if int(old_row[increasing_field]) > int(new_row[increasing_field]):
            print(int(old_row[increasing_field]), int(new_row[increasing_field]))
            nb_decreasing[increasing_field] += 1


4980735 4980733
23176972 23176971
1788814 1788813
9481115 9481114
1756560 1756557
9627257 9627256


In [9]:
nb_modified, nb_decreasing

({'CreativeUrl': 0,
  'StartDate': 5,
  'OrganizationName': 0,
  'CandidateBallotInformation': 0,
  'PayingAdvertiserName': 0,
  'CountryCode': 0,
  'Language': 0,
  'CreativeProperties': 10},
 {'Spend': 0, 'Impressions': 6})

## New ads

In [10]:
[
    (df_2.loc[ident]['StartDate'], df_2.loc[ident]['EndDate'])
    for ident in new_ids
]

[('2019/10/28 18:21:53Z', ''),
 ('2019/11/19 17:10:56Z', '2020/02/01 04:59:59Z'),
 ('2019/10/29 22:56:37Z', '2019/11/18 23:56:37Z'),
 ('2019/11/21 15:59:35Z', '2019/11/24 23:59:59Z'),
 ('2019/11/26 00:02:43Z', '2019/11/26 23:00:00Z'),
 ('2019/11/25 00:25:30Z', '2019/11/25 23:00:00Z'),
 ('2019/11/25 07:00:00Z', '2019/12/16 06:59:35Z'),
 ('2019/10/18 23:49:12Z', '2019/11/06 00:00:00Z'),
 ('2019/11/10 18:01:50Z', ''),
 ('2019/11/20 17:00:00Z', '2019/11/26 22:00:00Z'),
 ('2019/11/10 23:00:00Z', '2019/11/24 22:59:00Z'),
 ('2019/11/19 17:10:56Z', '2020/02/01 04:59:59Z'),
 ('2019/11/04 11:03:44Z', '2019/12/24 11:04:04Z'),
 ('2019/11/11 00:00:01Z', '2019/11/30 23:59:00Z'),
 ('2019/11/26 10:30:49Z', '2019/11/26 23:59:59Z'),
 ('2019/11/26 00:00:06Z', '2019/11/26 23:00:00Z'),
 ('2019/10/17 20:12:15Z', '2019/11/05 23:00:00Z'),
 ('2019/11/25 00:23:41Z', '2019/11/25 23:00:00Z'),
 ('2019/11/26 00:44:20Z', '2019/11/26 23:00:00Z'),
 ('2019/10/28 10:30:00Z', '2019/11/04 17:00:00Z'),
 ('2019/10/17 20:12: