## Load Filtered Presidential Ad Datasets into Pandas Dataframes

In [1]:
from awesome_puppies import ads_df as df
from awesome_puppies import ad_counts_by_region_and_beneficiary
from awesome_puppies import regions, region_string

# The following code block can be used if the awesome_puppies module
# does not work for you.

# import pandas as pd
#
# ads_direct = "../../datasets/Political_TV_Ad_Archive/"
#
# df = pd.read_csv(
#    ads_direct + "political_ad_pres_airing_from_july.gz",
#    compression="gzip",
#    parse_dates=["start_time", "end_time", "date_created"],
# )
# df.duration = df.end_time - df.start_time

# Here is how you can read the alternate working data sets if they are of
# interest.  The balance of this notebook assumes that you are working from the
# airing_from_july dataset.

# df = pd.read_csv(ads_direct + "political_ad_pres_airing.gz", compression='gzip')
# df = pd.read_csv(ads_direct + "political_ad_pres_unique.gz", compression='gzip')

In [2]:
df.shape

(86302, 19)

In [3]:
df.columns

Index(['network', 'region_id', 'program', 'program_type', 'start_time',
       'end_time', 'archive_id', 'embed_url', 'sponsors', 'sponsor_types',
       'subjects', 'candidates', 'message', 'date_created', 'beneficiary',
       'date', 'day_of_week', 'air_time', 'duration'],
      dtype='object')

In [4]:
df.dtypes

network                       object
region_id                     object
program                       object
program_type                  object
start_time       datetime64[ns, UTC]
end_time         datetime64[ns, UTC]
archive_id                    object
embed_url                     object
sponsors                      object
sponsor_types                 object
subjects                      object
candidates                    object
message                       object
date_created          datetime64[ns]
beneficiary                   object
date                          object
day_of_week                   object
air_time                      object
duration             timedelta64[ns]
dtype: object

In [5]:
df.network.value_counts()

WFTS    3939
WTVT    3720
WCAU    3434
WJW     3221
KYW     3200
        ... 
KTSF       9
WABC       8
WNYW       4
WCBS       2
KCNS       1
Name: network, Length: 77, dtype: int64

In [6]:
df.region_id.value_counts()

philadelphia_region     12428
tampa_region            12282
las_vegas_region        10918
cleveland_region        10341
san_francisco_region     9499
raleigh_region           6729
cedar_rapids_region      5248
boston_region            5010
denver_region            4707
milwaukee_region         4094
phoenix_region           3239
washington_dc_region     1759
new_york_city_region       48
Name: region_id, dtype: int64

In [7]:
df.program.value_counts()

Today                           2706
CBS This Morning                1713
Good Morning America            1533
Judge Judy                      1306
FOX5 News This Morning           848
                                ... 
Jeff Dunham Controlled Chaos       1
NCWTS Pre- Race                    1
Eastern Iowa O.T.                  1
F1 Extra                           1
Saturday News at 10pm              1
Name: program, Length: 1650, dtype: int64

In [8]:
df.program_type.value_counts()

news        56183
not news    30119
Name: program_type, dtype: int64

In [9]:
df.sponsors.value_counts()

Hillary for America                       38440
Donald J Trump For President              22617
Priorities USA Action                     11263
Rebuilding America Now                     2962
NextGen California Action Committee        2482
Women Vote!                                1102
Future45                                    960
Reform America Fund                         833
NRA Institute for Legislative Action        791
45 Committee                                766
National Rifle Assn                         669
Donald J. Trump For President               526
Internet Archive                            511
Stop Hillary PAC                            507
Great America PAC                           487
MoveOn.org                                  430
Democratic National Cmte                    207
Correct The Record                          141
Reclaim America PAC                         110
Republican National Cmte                     98
American Bridge 21st Century            

In [10]:
df.sponsor_types.value_counts()

Multiple                       38463
Candidate Committee            22617
Super PAC                      19796
Non Profit                      1615
Hybrid Super PAC                1565
PAC                             1124
Unknown                         1092
Joint Fundraising Committee       29
Corporation                        1
Name: sponsor_types, dtype: int64

In [11]:
df.subjects.value_counts()

Women, Candidate Biography, Children                     4184
Candidate Biography, Children                            4015
Taxes, Candidate Biography, Terrorism, Jobs              3902
Candidate Biography                                      3619
Economy, Jobs, Federal Budget, Taxes, Families           3216
                                                         ... 
Candidate Biography, Civil Rights, Immigration, Islam       1
Immigration, Candidate Biography                            1
Economy, Corporations, Taxes, Regulation                    1
Candidate Biography, Military, Foreign Policy               1
Terrorism, Military, Veterans                               1
Name: subjects, Length: 217, dtype: int64

In [12]:
df.candidates.value_counts()

Donald Trump                                                                                               42511
Hillary Clinton                                                                                            22828
Donald Trump, Hillary Clinton                                                                              20934
Donald Trump, John McCain, Kelly Ayotte, Marco Rubio, Richard Burr, Rob Portman, Ron Johnson, Roy Blunt       20
Bernie Sanders, Donald Trump, Hillary Clinton                                                                  6
Hillary Clinton, Tim Kaine                                                                                     3
Name: candidates, dtype: int64

In [13]:
df.message.value_counts()

con        48247
mixed      21238
pro        14914
unknown     1903
Name: message, dtype: int64

In [14]:
df.day_of_week.value_counts()

Tuesday      15708
Thursday     14243
Wednesday    14235
Friday       14096
Monday       13300
Saturday      7657
Sunday        7063
Name: day_of_week, dtype: int64

In [15]:
df.duration.value_counts()

0 days 00:00:30    66103
0 days 00:01:00     6111
0 days 00:00:29     4890
0 days 00:00:32     2945
0 days 00:00:31     2407
                   ...  
0 days 00:00:50        1
0 days 00:01:30        1
0 days 00:00:38        1
0 days 00:01:20        1
0 days 00:01:19        1
Name: duration, Length: 66, dtype: int64

In [16]:
df.beneficiary.value_counts()

Clinton    54617
Trump      31685
Name: beneficiary, dtype: int64

In [17]:
regions()

['boston_region',
 'cedar_rapids_region',
 'cleveland_region',
 'denver_region',
 'las_vegas_region',
 'milwaukee_region',
 'new_york_city_region',
 'philadelphia_region',
 'phoenix_region',
 'raleigh_region',
 'san_francisco_region',
 'tampa_region',
 'washington_dc_region']

In [18]:
for region in regions():
    print (region_string(region))

Boston Region
Cedar Rapids Region
Cleveland Region
Denver Region
Las Vegas Region
Milwaukee Region
New York City Region
Philadelphia Region
Phoenix Region
Raleigh Region
San Francisco Region
Tampa Region
Washington DC Region


In [19]:
ad_counts_by_region_and_beneficiary()

beneficiary,Clinton,Trump
region_id,Unnamed: 1_level_1,Unnamed: 2_level_1
boston_region,2770,2240
cedar_rapids_region,3820,1428
cleveland_region,6551,3790
denver_region,1633,3074
las_vegas_region,7906,3012
milwaukee_region,1288,2806
new_york_city_region,42,6
philadelphia_region,8263,4165
phoenix_region,2757,482
raleigh_region,4926,1803


In [20]:
ad_counts_by_region_and_beneficiary().Clinton

region_id
boston_region           2770
cedar_rapids_region     3820
cleveland_region        6551
denver_region           1633
las_vegas_region        7906
milwaukee_region        1288
new_york_city_region      42
philadelphia_region     8263
phoenix_region          2757
raleigh_region          4926
san_francisco_region    6081
tampa_region            8019
washington_dc_region     561
Name: Clinton, dtype: int64

In [21]:
ad_counts_by_region_and_beneficiary().loc["boston_region"]

beneficiary
Clinton    2770
Trump      2240
Name: boston_region, dtype: int64

In [22]:
ad_counts_by_region_and_beneficiary().T.boston_region

beneficiary
Clinton    2770
Trump      2240
Name: boston_region, dtype: int64