# Generating Test Data

In [1]:
# Packages we need:
import pandas as pd
import numpy as np
import re
from datetime import datetime
from itertools import product
from datetime import date, timedelta
import calendar

In [2]:
from IPython.display import display
pd.options.display.max_columns = None
pd.options.display.max_rows = 500

# Getting the Input

There are some input data that we need to generate Excel file.

In [3]:
date1 = input("Please enter the first day of the prediction in a form of yyyy-mm-dd:")

Please enter the first day of the prediction in a form of yyyy-mm-dd:2019-08-01


In [4]:
date1 = datetime.strptime(date1, '%Y-%m-%d').date()

In [5]:
date2 = input("Please enter the last day of the prediction in a form of yyyy-mm-dd:")

Please enter the last day of the prediction in a form of yyyy-mm-dd:2019-08-31


In [6]:
date2 = datetime.strptime(date2, '%Y-%m-%d').date()

In [7]:
print("The predictions will run from {} until {}!".format(date1,date2))

The predictions will run from 2019-08-01 until 2019-08-31!


In [8]:
# budget_m = int(input("Please enter the monthly budget:"))

In [9]:
# budget_d = int(input("Please enter the daily budget:"))

## Preparing the Data

The steps followed here are copied from the initial modelling notebook. 

In [152]:
# Uploading the raw data from S3
import boto3
from sagemaker import get_execution_role

role = get_execution_role()
bucket='facebookconversionrates/fb_raw_data'
data_key = 'fbadset.xlsx'
data_location = 's3://{}/{}'.format(bucket, data_key)

df = pd.read_excel(data_location,
                   sheet_name=['sheet1', 'sheet2','sheet3','sheet4','sheet5','sheet6','sheet7','sheet8','sheet9','sheet10'], 
                   skiprows=None)

In [153]:
#convert dict to df to concat all sheets
dfs = []
for dfname in df.keys():
    temp_df = df[dfname]
    temp_df['Session'] = dfname
    dfs.append(temp_df)
    
df = pd.concat(dfs)

In [154]:
df.columns

Index(['Ad Account ID', 'Ad Account Name', 'Reporting starts',
       'Reporting ends', 'Ad set name', 'Age', 'Ad set delivery', 'Bid',
       'Bid Type', 'Ad set budget', 'Ad set budget type',
       'Last significant edit', 'Results', 'Result indicator', 'Reach',
       'Impressions', 'Cost per results', 'Amount spent (USD)', 'Ends',
       'Starts', 'Link clicks', 'Website purchases', 'Leads (form)',
       'Campaign name', 'Campaign ID', 'Lead - Page Contains thank-you',
       'Lead Submission', 'Ad set ID', 'Landing page views', 'Session'],
      dtype='object')

In [13]:
df.shape

(105971, 30)

In [14]:
df = df.sort_values(by='Reporting starts',ascending=False)

In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 105971 entries, 7797 to 17744
Data columns (total 30 columns):
Ad Account ID                     105971 non-null int64
Ad Account Name                   105971 non-null object
Reporting starts                  105971 non-null datetime64[ns]
Reporting ends                    105971 non-null datetime64[ns]
Ad set name                       105971 non-null object
Age                               105971 non-null object
Ad set delivery                   105971 non-null object
Bid                               105971 non-null int64
Bid Type                          105971 non-null object
Ad set budget                     105971 non-null float64
Ad set budget type                105971 non-null object
Last significant edit             105971 non-null object
Results                           48315 non-null float64
Result indicator                  48315 non-null object
Reach                             105971 non-null int64
Impressions        

In [16]:
#remove space and concat _column names
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('_-_', '_')

In [17]:
max(df['reporting_starts'])

Timestamp('2019-09-03 00:00:00')

In [18]:
min(df['reporting_starts'])

Timestamp('2018-01-01 00:00:00')

In [19]:
df.drop_duplicates(keep=False,inplace=True)

In [20]:
df['ends'] = df['ends'].replace('Sürekli', 'Ongoing')

In [21]:
fb=df.copy()

In [22]:
fb['day_of_week']=fb['reporting_starts'].dt.dayofweek
days = {0:'Mon',1:'Tues',2:'Weds',3:'Thurs',4:'Fri',5:'Sat',6:'Sun'}
fb['day_of_week'] = fb['day_of_week'].apply(lambda x: days[x])

In [23]:
fb['year'],fb['month'],fb['week_num'] = fb['reporting_starts'].dt.year, fb['reporting_starts'].dt.month, fb['reporting_starts'].dt.week

In [24]:
#return_inverse : bool, optional
#If True, also return the indices of the unique array 
#(for the specified axis, if provided) that can be used to reconstruct ar.
def unique_val_count(a):
    unique, inverse = np.unique(a, return_inverse=True)
    count = np.zeros(len(unique), np.int)
    np.add.at(count, inverse, 1)
    return np.vstack((unique, count)).T

In [25]:
fb['campaign_name']=fb.campaign_name.str.replace('Facebook','').str.strip().str.replace(' ','')

In [26]:
fb['campaign_name']=fb['campaign_name'].str.replace(' ',
'').str.strip().apply(lambda x : x[1:] if x.startswith("-") else x)

In [27]:
fb['location'] = fb.campaign_name.str.split('-').str[0]
fb['topic'] = fb.campaign_name.str.split('-').str[1]
fb['type'] = fb.campaign_name.str.split('-').str[2]

In [28]:
fb['target'] = fb.ad_set_name.str.split('-').str[1]
fb['target_sub'] = fb.ad_set_name.str.split('-').str[2]
fb['version']=fb.ad_set_name.str.extract('-([^-]*)$')
#fb['version']=fb.ad_set_name.str.split('(\d+)').astype(int)

In [29]:
fb['target']=fb['target'].str.replace("AEC|CRE|Cloud|Event:Webinar|GEN|INS|MF|MSP|PS|RRE|TH",'').str.replace('-',"")
fb['target']=fb['target'].str.replace("AU|BoostedPost|CA|DE|EMEA|ES|FR|IT|MX|PL|SE|UK|US|ZA",'').str.replace('-',"")
fb['target']=fb['target'].str.replace("AU|BoostedPost|CA|DE|EMEA|ES|FR|IT|MX|PL|SE|UK|US|ZA|LAL|Latin America",'').str.replace('-',"")

In [30]:
fb['target_grouped'] = fb['target'].astype(str) + fb['target_sub'].astype(str)

In [31]:
unique_val_count(fb.target_grouped.astype(str)).tolist()

[['    All Lists ', 558],
 ['    Brokers LAL ', 113],
 ['    PS ', 104],
 ['    Submitted Lead Form LAL ', 148],
 ['    Website Remarketing ', 132],
 ['   2% ', 230],
 ['   Agents & Brokers ', 1625],
 ['   All Lists ', 15856],
 ['   All Lists  LAL ', 181],
 ['   All Remarketing ', 116],
 ['   Architects ', 1639],
 ['   Awareness', 232],
 ['   Awareness ', 1832],
 ['   B2B TH + Online Advertisiing ', 80],
 ['   B2B TH + Online Advertising ', 360],
 ['   Brokers ', 223],
 ['   Camera Sale LAL ', 110],
 ['   Camera Sales & Buy Site LAL ', 605],
 ['   Case Study ', 51],
 ['   Claims Adjusters Job Titles ', 48],
 ['   Construction ', 1722],
 ['   Construction Webinar Event ', 12],
 ['   DE/ROW ', 689],
 ['   DQ Cost ', 356],
 ['   DQ Cost Remarketing & LAL ', 16],
 ['   DQ Cost Remarketing LAL ', 4],
 ['   DQ Remarketing & LAL ', 20],
 ['   David Troyer ', 268],
 ['   December Sale ', 340],
 ['   EBook ', 108],
 ['   ENGL ', 1017],
 ['   ENT Photography Entrepreneur ', 708],
 ['   Engineers

In [32]:
fb.columns

Index(['ad_account_id', 'ad_account_name', 'reporting_starts',
       'reporting_ends', 'ad_set_name', 'age', 'ad_set_delivery', 'bid',
       'bid_type', 'ad_set_budget', 'ad_set_budget_type',
       'last_significant_edit', 'results', 'result_indicator', 'reach',
       'impressions', 'cost_per_results', 'amount_spent_(usd)', 'ends',
       'starts', 'link_clicks', 'website_purchases', 'leads_(form)',
       'campaign_name', 'campaign_id', 'lead_page_contains_thank-you',
       'lead_submission', 'ad_set_id', 'landing_page_views', 'session',
       'day_of_week', 'year', 'month', 'week_num', 'location', 'topic', 'type',
       'target', 'target_sub', 'version', 'target_grouped'],
      dtype='object')

In [33]:
fb['target_grouped'].replace(regex=True, inplace=True, to_replace=r'[^A-Za-z0-9]+',
value='')

In [34]:
fb.target_grouped=fb.target_grouped.str.strip().str.lower().str.replace(' ', '')

In [35]:
fb['target_grouped']=fb.target_grouped.str.replace('openedleadformslal','openedleadformlal')

In [36]:
fb['target_grouped']=fb.target_grouped.str.replace('MQLSQLPS','MQLSQL')

In [37]:
fb[['target_grouped']].head()

Unnamed: 0,target_grouped
7797,evergreen
7786,evergreen
7776,evergreen
7777,evergreen
7778,evergreen


In [38]:
fb['topic']=fb.topic.str.replace('Clould3.0','Cloud3.0')
fb['topic']=fb.topic.str.replace('CarribeanIslands','CaribbeanIslands')
fb['topic']=fb.topic.str.replace('AECArchitects','AEC')
fb['topic']=fb.topic.str.replace('CAWebsiteRetargeting','RetargetingWebsite')
fb['topic']=fb.topic.str.replace('CAWebsiteRetargeting','RetargetingWebsite')
fb['topic']=fb.topic.str.replace('WebsiteRetargeting','RetargetingWebsite')
fb['target']=fb.target.str.replace('Re','Re-Engagement')
fb['target']=fb.target.str.replace('&','')

In [39]:
age=['18-24', '25-34', '35-44', '45-54', '55-64', '65+']

In [40]:
fb=fb.query('age in @age')

In [41]:
#fill all blanks with novalue if the column dtype is object
fb = fb.apply(lambda x: x.fillna(0.0) if x.dtype.kind in 'biufc' else x.fillna('novalue'))

In [42]:
fb=fb.fillna(0.0)      #if the blanks are nan will need this line first

In [43]:
fb.columns

Index(['ad_account_id', 'ad_account_name', 'reporting_starts',
       'reporting_ends', 'ad_set_name', 'age', 'ad_set_delivery', 'bid',
       'bid_type', 'ad_set_budget', 'ad_set_budget_type',
       'last_significant_edit', 'results', 'result_indicator', 'reach',
       'impressions', 'cost_per_results', 'amount_spent_(usd)', 'ends',
       'starts', 'link_clicks', 'website_purchases', 'leads_(form)',
       'campaign_name', 'campaign_id', 'lead_page_contains_thank-you',
       'lead_submission', 'ad_set_id', 'landing_page_views', 'session',
       'day_of_week', 'year', 'month', 'week_num', 'location', 'topic', 'type',
       'target', 'target_sub', 'version', 'target_grouped'],
      dtype='object')

In [44]:
fb[(fb['location']=='US')&(fb['year']==2019) &(fb['topic']=='RRE')
   ].groupby(['campaign_name','ad_set_name','target_grouped']).size()

campaign_name           ad_set_name                                            target_grouped        
US-RRE-LeadSubmissions  US - RRE - Agents & Brokers - LAL - 10.17              agentsbrokers             128
                        US - RRE - Camera Sale LAL - Evergreen - 4.24          camerasalelal             108
                        US - RRE - Evergreen - Agents & Brokers - LAL - 4.10   evergreen                 256
                        US - RRE - Evergreen - Agents & Brokers - LAL - 4.19   evergreen                 128
                        US - RRE - Evergreen - Employers - 7.31                evergreen                 175
                        US - RRE - Evergreen - Job Titles - 7.31               evergreen                 174
                        US - RRE - Evergreen - Video Remarketing & LAL - 4.18  evergreen                 282
                        US - RRE - Evergreen - Video Remarketing & LAL - 4.4   evergreen                 108
                        US

In [45]:
fb['conversion']= fb[['lead_page_contains_thank-you']].astype(float)

In [46]:
def conversion_categ(df):
    if ((df.conversion>=0) & (df.conversion<=2)):
        return "btw 0 and 2 conversions"
    elif ((df.conversion>=3) & (df.conversion<=6)): 
        return "btw 3 and 6 conversions"
    elif ((df.conversion>=7) & (df.conversion<=12)): 
        return "btw 7 and 12 conversions"
    else:
        return 'more than 12 conversions'          

In [47]:
fb['labels']=fb.apply(conversion_categ,axis=1)

In [48]:
def missing_values(df):
        mis = df.isnull().sum()
        mis_percent = 100 * df.isnull().sum() / len(df)
        mis_val_table = pd.concat([mis, mis_percent], axis=1)
        mis_val_table_ren_columns = mis_val_table.rename(
        columns = {0 : 'Missing Values', 1 : '% of Total Values'})
        mis_val_table_ren_columns = mis_val_table_ren_columns[
            mis_val_table_ren_columns.iloc[:,1] != 0].sort_values(
        '% of Total Values', ascending=False).round(2)
        print ("df has " + str(df.shape[1]) + " columns.\n"      
            "There are " + str(mis_val_table_ren_columns.shape[0]) +
              " columns that have missing values.")
        return mis_val_table_ren_columns

In [49]:
missing_values(fb)

df has 43 columns.
There are 0 columns that have missing values.


Unnamed: 0,Missing Values,% of Total Values


In [50]:
np.unique(fb.age,return_counts=True)

(array(['18-24', '25-34', '35-44', '45-54', '55-64', '65+'], dtype=object),
 array([ 5661, 24286, 24293, 24289, 23058,  4298]))

In [51]:
#repordering the columns
fb=fb[['ad_account_id', 'ad_account_name', 'reporting_starts',
       'reporting_ends', 'ad_set_name', 'age', 'ad_set_delivery', 'bid',
       'bid_type', 'ad_set_budget', 'ad_set_budget_type',
       'last_significant_edit', 'results', 'result_indicator', 'reach',
       'impressions', 'cost_per_results', 'amount_spent_(usd)', 'ends',
       'starts', 'link_clicks', 'website_purchases', 'leads_(form)',
       'campaign_name', 'campaign_id', 'lead_page_contains_thank-you',
       'lead_submission', 'ad_set_id', 'landing_page_views', 'session',
       'day_of_week', 'year', 'month', 'week_num', 'location', 'topic', 'type',
       'target', 'target_sub', 'version', 'target_grouped', 'conversion',
       'labels']]

In [52]:
def unique_val_count(a):
    unique, inverse = np.unique(a, return_inverse=True)
    count = np.zeros(len(unique), np.int)
    np.add.at(count, inverse, 1)
    return np.vstack((unique, count)).T

In [53]:
fb['location'].value_counts()

US             45508
EMEA           31918
UK              7396
MX              5941
DE              5630
FR              3439
CA              2095
AU              1703
ZA              1594
ES               372
IT               112
BoostedPost      112
SE                65
Name: location, dtype: int64

In [54]:
fb.head()

Unnamed: 0,ad_account_id,ad_account_name,reporting_starts,reporting_ends,ad_set_name,age,ad_set_delivery,bid,bid_type,ad_set_budget,ad_set_budget_type,last_significant_edit,results,result_indicator,reach,impressions,cost_per_results,amount_spent_(usd),ends,starts,link_clicks,website_purchases,leads_(form),campaign_name,campaign_id,lead_page_contains_thank-you,lead_submission,ad_set_id,landing_page_views,session,day_of_week,year,month,week_num,location,topic,type,target,target_sub,version,target_grouped,conversion,labels
7797,971415906232505,Matterport,2019-09-03,2019-09-03,ES - PS - Evergreen - All Lists - 5.1,55-64,active,0,ABSOLUTE_OCPM,25.0,Daily,2019-08-16T12:52:16-0700,0.0,novalue,139,162,0.0,0.93,Ongoing,43586,0.0,0.0,0.0,EMEA-ES-PS-LeadSubmissions,6074860433239,0.0,0.0,6124979913439,0.0,sheet10,Tues,2019,9,36,EMEA,ES,PS,,Evergreen,5.1,evergreen,0.0,btw 0 and 2 conversions
7786,971415906232505,Matterport,2019-09-03,2019-09-03,UK - PS - Evergreen - Video & Website Remarket...,25-34,active,0,ABSOLUTE_OCPM,400.0,Daily,2019-09-01T21:29:38-0700,1.0,actions:offsite_conversion.custom.109991576318312,8480,9509,113.32,113.32,Ongoing,43608,52.0,0.0,0.0,EMEA-UK-PS-LeadSubmissions,6058738984839,1.0,0.0,6127430097839,37.0,sheet10,Tues,2019,9,36,EMEA,UK,PS,,Evergreen,5.23,evergreen,1.0,btw 0 and 2 conversions
7776,971415906232505,Matterport,2019-09-03,2019-09-03,UK - PS - Evergreen - SQL LAL - 5.23,25-34,active,0,ABSOLUTE_OCPM,50.0,Daily,2019-09-01T21:31:22-0700,0.0,novalue,956,1009,0.0,9.21,Ongoing,43608,7.0,0.0,0.0,EMEA-UK-PS-LeadSubmissions,6058738984839,0.0,0.0,6127432339039,4.0,sheet10,Tues,2019,9,36,EMEA,UK,PS,,Evergreen,5.23,evergreen,0.0,btw 0 and 2 conversions
7777,971415906232505,Matterport,2019-09-03,2019-09-03,UK - PS - Evergreen - SQL LAL - 5.23,18-24,active,0,ABSOLUTE_OCPM,50.0,Daily,2019-09-01T21:31:22-0700,0.0,novalue,127,139,0.0,1.03,Ongoing,43608,0.0,0.0,0.0,EMEA-UK-PS-LeadSubmissions,6058738984839,0.0,0.0,6127432339039,0.0,sheet10,Tues,2019,9,36,EMEA,UK,PS,,Evergreen,5.23,evergreen,0.0,btw 0 and 2 conversions
7778,971415906232505,Matterport,2019-09-03,2019-09-03,UK - PS - Evergreen - Camera Sales LAL - 5.23,55-64,active,0,ABSOLUTE_OCPM,50.0,Daily,2019-09-01T21:31:14-0700,0.0,novalue,63,66,0.0,1.08,Ongoing,43608,0.0,0.0,0.0,EMEA-UK-PS-LeadSubmissions,6058738984839,0.0,0.0,6127431430239,0.0,sheet10,Tues,2019,9,36,EMEA,UK,PS,,Evergreen,5.23,evergreen,0.0,btw 0 and 2 conversions


In [55]:
len(fb[((fb.campaign_name.str.contains('LeadSubmissions')) & (fb.campaign_name.str.contains('PS')))]['campaign_name'])

25661

In [56]:
len(fb[((fb.campaign_name.str.contains('LeadSubmissions')) & (fb.campaign_name.str.contains('RRE')))]['campaign_name'])

43602

In [57]:
fb[((fb.campaign_name.str.contains('LeadSubmissions')) & (fb.campaign_name.str.contains('PS'))) | 
       ((fb.campaign_name.str.contains('LeadSubmissions')) & (fb.campaign_name.str.contains('RRE')))]['campaign_name'].value_counts()

US-RRE-LeadSubmissions                 12697
US-PS-LeadSubmissions                  10440
EMEA-UK-PS-LeadSubmissions              5917
EMEA-ES-RRE-LeadSubmissions             4912
EMEA-FR-RRE-LeadSubmissions             4646
MX-RRE-LatinAmerica-LeadSubmissions     4230
EMEA-RRE-LeadSubmissions                3196
DE-RRE-LeadSubmissions                  3130
EMEA-FR-PS-LeadSubmissions              2658
EMEA-ZA-PS-LeadSubmissions              2345
UK-RRE-LeadSubmissions                  2145
EMEA-PS-LeadSubmissions                 2125
CA-RRE-LeadSubmissions                  2095
EMEA-IT-RRE-LeadSubmissions             1928
AU-RRE-LeadSubmissions                  1703
ZA-RRE-LeadSubmissions                  1134
EMEA-DE-PS-LeadSubmissions              1128
EMEA-PL-RRE-LeadSubmissions              964
EMEA-ES-PS-LeadSubmissions               485
EMEA-IT-PS-LeadSubmissions               359
US-RRE-P1Areas-LeadSubmissions           297
EMEA-UK-RRE-LeadSubmissions              260
MX-PS-LATA

In [58]:
len(fb[((fb.campaign_name.str.contains('LeadSubmissions')) & (fb.campaign_name.str.contains('PS'))) | 
       ((fb.campaign_name.str.contains('LeadSubmissions')) & (fb.campaign_name.str.contains('RRE')))]['campaign_name'])

69263

In [102]:
fb[~(((fb.campaign_name.str.contains('LeadSubmissions')) & (fb.campaign_name.str.contains('PS'))) | 
       ((fb.campaign_name.str.contains('LeadSubmissions')) & (fb.campaign_name.str.contains('RRE'))))]['campaign_name'].value_counts()

US-PS-LeadAds-2018                                                             4220
US-AEC-LeadSubmissions                                                         3843
US-MSP-Reach-Awareness                                                         1835
US-TH-LeadSubmissions                                                          1813
US-Cloud-Awareness                                                             1624
FR-RRE-Reach-Awareness                                                         1579
US-INS-LeadSubmissions                                                         1507
MX-RRE-LatinAmerica-Awareness                                                  1475
UK-PS-LeadAds-2018                                                             1456
US-RRE-LeadAds-2018                                                            1444
UK-PS-Awareness                                                                1361
DE-PS-LeadAds-2018                                                          

In [106]:
len(fb[(fb.campaign_name.str.contains('LeadSubmissions')) & (fb.campaign_name.str.contains('AEC'))])

3843

In [59]:
fb_ls = fb[((fb.campaign_name.str.contains('LeadSubmissions')) & (fb.campaign_name.str.contains('PS'))) | 
       ((fb.campaign_name.str.contains('LeadSubmissions')) & (fb.campaign_name.str.contains('RRE')))]

In [60]:
len(fb_ls)

69263

In [61]:
fb_ls.reset_index(drop = True, inplace = True)

In [62]:
fb_ls

Unnamed: 0,ad_account_id,ad_account_name,reporting_starts,reporting_ends,ad_set_name,age,ad_set_delivery,bid,bid_type,ad_set_budget,ad_set_budget_type,last_significant_edit,results,result_indicator,reach,impressions,cost_per_results,amount_spent_(usd),ends,starts,link_clicks,website_purchases,leads_(form),campaign_name,campaign_id,lead_page_contains_thank-you,lead_submission,ad_set_id,landing_page_views,session,day_of_week,year,month,week_num,location,topic,type,target,target_sub,version,target_grouped,conversion,labels
0,971415906232505,Matterport,2019-09-03,2019-09-03,ES - PS - Evergreen - All Lists - 5.1,55-64,active,0,ABSOLUTE_OCPM,25.0,Daily,2019-08-16T12:52:16-0700,0.0,novalue,139,162,0.000000,0.930000,Ongoing,43586,0.0,0.0,0.0,EMEA-ES-PS-LeadSubmissions,6074860433239,0.0,0.0,6124979913439,0.0,sheet10,Tues,2019,9,36,EMEA,ES,PS,,Evergreen,5.1,evergreen,0.0,btw 0 and 2 conversions
1,971415906232505,Matterport,2019-09-03,2019-09-03,UK - PS - Evergreen - Video & Website Remarket...,25-34,active,0,ABSOLUTE_OCPM,400.0,Daily,2019-09-01T21:29:38-0700,1.0,actions:offsite_conversion.custom.109991576318312,8480,9509,113.320000,113.320000,Ongoing,43608,52.0,0.0,0.0,EMEA-UK-PS-LeadSubmissions,6058738984839,1.0,0.0,6127430097839,37.0,sheet10,Tues,2019,9,36,EMEA,UK,PS,,Evergreen,5.23,evergreen,1.0,btw 0 and 2 conversions
2,971415906232505,Matterport,2019-09-03,2019-09-03,UK - PS - Evergreen - SQL LAL - 5.23,25-34,active,0,ABSOLUTE_OCPM,50.0,Daily,2019-09-01T21:31:22-0700,0.0,novalue,956,1009,0.000000,9.210000,Ongoing,43608,7.0,0.0,0.0,EMEA-UK-PS-LeadSubmissions,6058738984839,0.0,0.0,6127432339039,4.0,sheet10,Tues,2019,9,36,EMEA,UK,PS,,Evergreen,5.23,evergreen,0.0,btw 0 and 2 conversions
3,971415906232505,Matterport,2019-09-03,2019-09-03,UK - PS - Evergreen - SQL LAL - 5.23,18-24,active,0,ABSOLUTE_OCPM,50.0,Daily,2019-09-01T21:31:22-0700,0.0,novalue,127,139,0.000000,1.030000,Ongoing,43608,0.0,0.0,0.0,EMEA-UK-PS-LeadSubmissions,6058738984839,0.0,0.0,6127432339039,0.0,sheet10,Tues,2019,9,36,EMEA,UK,PS,,Evergreen,5.23,evergreen,0.0,btw 0 and 2 conversions
4,971415906232505,Matterport,2019-09-03,2019-09-03,UK - PS - Evergreen - Camera Sales LAL - 5.23,55-64,active,0,ABSOLUTE_OCPM,50.0,Daily,2019-09-01T21:31:14-0700,0.0,novalue,63,66,0.000000,1.080000,Ongoing,43608,0.0,0.0,0.0,EMEA-UK-PS-LeadSubmissions,6058738984839,0.0,0.0,6127431430239,0.0,sheet10,Tues,2019,9,36,EMEA,UK,PS,,Evergreen,5.23,evergreen,0.0,btw 0 and 2 conversions
5,971415906232505,Matterport,2019-09-03,2019-09-03,UK - PS - Evergreen - Camera Sales LAL - 5.23,45-54,active,0,ABSOLUTE_OCPM,50.0,Daily,2019-09-01T21:31:14-0700,1.0,actions:offsite_conversion.custom.109991576318312,992,1093,14.750000,14.750000,Ongoing,43608,5.0,0.0,0.0,EMEA-UK-PS-LeadSubmissions,6058738984839,1.0,0.0,6127431430239,5.0,sheet10,Tues,2019,9,36,EMEA,UK,PS,,Evergreen,5.23,evergreen,1.0,btw 0 and 2 conversions
6,971415906232505,Matterport,2019-09-03,2019-09-03,UK - PS - Evergreen - Camera Sales LAL - 5.23,35-44,active,0,ABSOLUTE_OCPM,50.0,Daily,2019-09-01T21:31:14-0700,0.0,novalue,1386,1545,0.000000,16.640000,Ongoing,43608,10.0,0.0,0.0,EMEA-UK-PS-LeadSubmissions,6058738984839,0.0,0.0,6127431430239,9.0,sheet10,Tues,2019,9,36,EMEA,UK,PS,,Evergreen,5.23,evergreen,0.0,btw 0 and 2 conversions
7,971415906232505,Matterport,2019-09-03,2019-09-03,UK - PS - Evergreen - Camera Sales LAL - 5.23,25-34,active,0,ABSOLUTE_OCPM,50.0,Daily,2019-09-01T21:31:14-0700,1.0,actions:offsite_conversion.custom.109991576318312,1521,1656,16.720000,16.720000,Ongoing,43608,10.0,0.0,0.0,EMEA-UK-PS-LeadSubmissions,6058738984839,1.0,0.0,6127431430239,9.0,sheet10,Tues,2019,9,36,EMEA,UK,PS,,Evergreen,5.23,evergreen,1.0,btw 0 and 2 conversions
8,971415906232505,Matterport,2019-09-03,2019-09-03,UK - PS - Evergreen - Camera Sales LAL - 5.23,18-24,active,0,ABSOLUTE_OCPM,50.0,Daily,2019-09-01T21:31:14-0700,0.0,novalue,137,157,0.000000,1.290000,Ongoing,43608,1.0,0.0,0.0,EMEA-UK-PS-LeadSubmissions,6058738984839,0.0,0.0,6127431430239,0.0,sheet10,Tues,2019,9,36,EMEA,UK,PS,,Evergreen,5.23,evergreen,0.0,btw 0 and 2 conversions
9,971415906232505,Matterport,2019-09-03,2019-09-03,UK - PS - Evergreen - Video & Website Remarket...,55-64,active,0,ABSOLUTE_OCPM,400.0,Daily,2019-09-01T21:29:38-0700,0.0,novalue,256,392,0.000000,9.000000,Ongoing,43608,3.0,0.0,0.0,EMEA-UK-PS-LeadSubmissions,6058738984839,0.0,0.0,6127430097839,3.0,sheet10,Tues,2019,9,36,EMEA,UK,PS,,Evergreen,5.23,evergreen,0.0,btw 0 and 2 conversions


In [63]:
fb_ls.location.value_counts()

EMEA    31038
US      23519
MX       4434
DE       3130
UK       2145
CA       2095
AU       1703
ZA       1134
SE         65
Name: location, dtype: int64

In [64]:
fb_ls.topic.value_counts()

RRE    30777
PS     12769
FR      7304
UK      6177
ES      5397
ZA      2345
IT      2287
DE      1243
PL       964
Name: topic, dtype: int64

In [65]:
fb_ls.type.value_counts()

LeadSubmissions    38730
PS                 12892
RRE                12825
LatinAmerica        4230
P1Areas              297
LATAM                204
Device                85
Name: type, dtype: int64

In [66]:
fb_ls[(fb_ls['location'] == 'EMEA') & ((fb_ls['topic'] == 'PS')|(fb_ls['topic'] == 'RRE'))]['type'].value_counts()

LeadSubmissions    5321
Name: type, dtype: int64

In [67]:
fb_emea = fb_ls[fb_ls['location'] == 'EMEA'][['location','topic','type','target_sub']]
fb_emea[(fb_emea['topic'] == "PS") | (fb_emea['topic'] == "RRE")]['type'].value_counts()
# fb_emea

LeadSubmissions    5321
Name: type, dtype: int64

In [68]:
loc_lst = []

for i in range(len(fb_ls)):
    if fb_ls['location'][i] == 'EMEA':
        if (fb_ls['topic'][i] == 'PS') | (fb_ls['topic'][i] == 'RRE'):
            loc_lst.append('EMEA')
        else:
            loc_lst.append(fb_ls['topic'][i])
    else:
        loc_lst.append(fb_ls['location'][i])
        
pd.Series(loc_lst).value_counts()

US      23519
UK       8322
FR       7304
ES       5397
EMEA     5321
MX       4434
DE       4373
ZA       3479
IT       2287
CA       2095
AU       1703
PL        964
SE         65
dtype: int64

In [69]:
print(pd.Series(loc_lst).value_counts().sum())
print(len(fb_ls))

69263
69263


In [70]:
fb_ls['location1'] = loc_lst
fb_ls.location1.value_counts()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


US      23519
UK       8322
FR       7304
ES       5397
EMEA     5321
MX       4434
DE       4373
ZA       3479
IT       2287
CA       2095
AU       1703
PL        964
SE         65
Name: location1, dtype: int64

In [71]:
fb_ls[fb_ls['location'] == 'EMEA']['topic'].value_counts()

FR     7304
UK     6177
ES     5397
RRE    3196
ZA     2345
IT     2287
PS     2125
DE     1243
PL      964
Name: topic, dtype: int64

In [72]:
fb_ls[(fb_ls['location'] == 'EMEA') & ((fb_ls['topic'] != 'PS')&(fb_ls['topic'] != 'RRE')) &
      (fb_ls['type'] == 'LeadSubmissions')]['ad_set_name'].value_counts()

Series([], Name: ad_set_name, dtype: int64)

In [73]:
fb_ls[(fb_ls['location'] == 'EMEA') & ((fb_ls['topic'] != 'PS')&(fb_ls['topic'] != 'RRE'))]['type'].value_counts()

PS     12892
RRE    12825
Name: type, dtype: int64

In [74]:
topic_lst = []

for i in range(len(fb_ls)):
    if fb_ls['location'][i] == 'EMEA':
        if (fb_ls['topic'][i] != 'PS') & (fb_ls['topic'][i] != 'RRE'):
            topic_lst.append(fb_ls['type'][i])
        else:
            topic_lst.append(fb_ls['topic'][i])
    else:
        topic_lst.append(fb_ls['topic'][i])
        
pd.Series(topic_lst).value_counts()

RRE    43602
PS     25661
dtype: int64

In [75]:
print(pd.Series(topic_lst).value_counts().sum())
print(len(fb_ls))

69263
69263


In [76]:
fb_ls['topic1'] = topic_lst
fb_ls.topic1.value_counts()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


RRE    43602
PS     25661
Name: topic1, dtype: int64

In [77]:
fb_ls.drop(columns = ['location','topic'], inplace=True)
fb_ls = fb_ls.rename(columns={'location1':'location',
                      'topic1':'topic'})

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


In [78]:
# Saving the file to S3:
from io import StringIO
import boto3

bucket='facebookconversionrates' # bucket name
data_key = 'preprocessing/fb_prepro.csv' # name of the csv file that will be created
csv_buffer = StringIO()
fb_ls.to_csv(csv_buffer)
s3_resource = boto3.resource('s3')
s3_resource.Object(bucket, data_key).put(Body=csv_buffer.getvalue())

{'ResponseMetadata': {'RequestId': '82B9413703E15F7A',
  'HostId': '4J2Cku4e9xuCgrILUxOIUtq9ftcEBTULodF1756VdvEl3ZcFraXin08KI33VliNWFmmw1DSgl+Y=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': '4J2Cku4e9xuCgrILUxOIUtq9ftcEBTULodF1756VdvEl3ZcFraXin08KI33VliNWFmmw1DSgl+Y=',
   'x-amz-request-id': '82B9413703E15F7A',
   'date': 'Thu, 05 Sep 2019 18:27:19 GMT',
   'etag': '"420d3eb98f16d67bf422cbee58a8f9d4"',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'ETag': '"420d3eb98f16d67bf422cbee58a8f9d4"'}

## Generating the Final CSV File

In this section, I will create a CSV file containing all the combinations of age, location, topic, date and target groups.

In [81]:
# Create a list that gives all the dates between the initial and the last date.
d1 = date1  # start date
d2 = date2  # end date

delta = d2 - d1  # timedelta

dates = [] # empty list to store all the dates in between

for i in range(delta.days + 1):
    dates.append(d1 + timedelta(days=i))

dates

[datetime.date(2019, 8, 1),
 datetime.date(2019, 8, 2),
 datetime.date(2019, 8, 3),
 datetime.date(2019, 8, 4),
 datetime.date(2019, 8, 5),
 datetime.date(2019, 8, 6),
 datetime.date(2019, 8, 7),
 datetime.date(2019, 8, 8),
 datetime.date(2019, 8, 9),
 datetime.date(2019, 8, 10),
 datetime.date(2019, 8, 11),
 datetime.date(2019, 8, 12),
 datetime.date(2019, 8, 13),
 datetime.date(2019, 8, 14),
 datetime.date(2019, 8, 15),
 datetime.date(2019, 8, 16),
 datetime.date(2019, 8, 17),
 datetime.date(2019, 8, 18),
 datetime.date(2019, 8, 19),
 datetime.date(2019, 8, 20),
 datetime.date(2019, 8, 21),
 datetime.date(2019, 8, 22),
 datetime.date(2019, 8, 23),
 datetime.date(2019, 8, 24),
 datetime.date(2019, 8, 25),
 datetime.date(2019, 8, 26),
 datetime.date(2019, 8, 27),
 datetime.date(2019, 8, 28),
 datetime.date(2019, 8, 29),
 datetime.date(2019, 8, 30),
 datetime.date(2019, 8, 31)]

In [82]:
fb_ls['location'].value_counts()

US      23519
UK       8322
FR       7304
ES       5397
EMEA     5321
MX       4434
DE       4373
ZA       3479
IT       2287
CA       2095
AU       1703
PL        964
SE         65
Name: location, dtype: int64

In [87]:
# Percentage of all EMEA values in the dataframe. 
# In other words, the observations that don't have exact location.
len(fb_ls[fb_ls['location'] == 'EMEA'])/fb_ls['location'].value_counts().sum()

0.07682312345696837

In [88]:
age=['18-24', '25-34', '35-44', '45-54', '55-64', '65+'] # all combinations for age
locations = ['US','UK','FR', 'ES', 'ZA','IT','AU','DE','MX','CA'] # all combinations for location
topics = ['RRE','PS'] # all combinations for topic

In [89]:
fb_ls.columns

Index(['ad_account_id', 'ad_account_name', 'reporting_starts',
       'reporting_ends', 'ad_set_name', 'age', 'ad_set_delivery', 'bid',
       'bid_type', 'ad_set_budget', 'ad_set_budget_type',
       'last_significant_edit', 'results', 'result_indicator', 'reach',
       'impressions', 'cost_per_results', 'amount_spent_(usd)', 'ends',
       'starts', 'link_clicks', 'website_purchases', 'leads_(form)',
       'campaign_name', 'campaign_id', 'lead_page_contains_thank-you',
       'lead_submission', 'ad_set_id', 'landing_page_views', 'session',
       'day_of_week', 'year', 'month', 'week_num', 'type', 'target',
       'target_sub', 'version', 'target_grouped', 'conversion', 'labels',
       'location', 'topic'],
      dtype='object')

A mask is created to get the PS or RRE types:

In [107]:
mask1 = fb_ls['topic'].str.contains("PS")
mask2 = fb_ls['topic'].str.contains("RRE")
mask3 = fb_ls['type'].str.contains("PS")
mask4 = fb_ls['type'].str.contains("RRE")

In [108]:
PS_groups = fb_ls[mask1 | mask3].target_grouped.value_counts().index.tolist()

In [109]:
RRE_groups = fb_ls[mask2 | mask4].target_grouped.value_counts().index.tolist()

As a result two different lists are generated. They included all the target groups recorded for the corresponding topic.

In [110]:
fb_ls[mask1 | mask3].target_grouped.value_counts()[:10] # PS_groups with the frequency of each targeted group

evergreen                 6571
entselfemployeddrivers    2893
alllists                  2468
mql                       1885
febpromo                  1368
websiteremarketing        1248
mqlsql                    1179
openedleadformlal         1146
videoremarketinglal        921
sql                        755
Name: target_grouped, dtype: int64

In [111]:
fb_ls[mask2 | mask4].target_grouped.value_counts()[:10] # RRE_groups with the frequency of each targeted group

alllists                 7942
websiteremarketing       2881
remaxagents              2606
evergreen                2428
retargetingwebsitelal    1936
agentsbrokers            1624
febpromo                 1560
mqllal                   1452
remaxagentsbrokerslal    1169
engl                     1016
Name: target_grouped, dtype: int64

According to the analysis on [Data Studio](https://datastudio.google.com/u/0/reporting/1GT-CBxp6O-_IW5b55d6NTLFTa1fTJud0/page/eKxs), three most targeted groups are picked in the June '19 and early July '19. 

In [112]:
PS_groups_upd = ['evergreen','entphotographyentrepreneur','infographic', 'photographers']

In [113]:
RRE_groups_upd = ['evergreen','pagevisitorremarketing']

### Assumption:
Here there is an assumption to underline:
The most recently used target groups are picked as the targeted groups. The most recent two months are taken into consideration and the most commonly targeted 3 groups are picked for both PS and RRE.

In [114]:
target_grouped = PS_groups_upd + RRE_groups_upd

In [115]:
target_grouped

['evergreen',
 'entphotographyentrepreneur',
 'infographic',
 'photographers',
 'evergreen',
 'pagevisitorremarketing']

In [116]:
# Creating the first version of the dataframe:
df_testset = pd.DataFrame(list(product(dates,age,locations,topics,target_grouped)), 
                          columns = ['date','age','location','topic','target_grouped1'])

In [117]:
# The date column is divided into year, month and week number.
df_testset['date'] = pd.to_datetime(df_testset['date'], errors = 'coerce')

df_testset['year'] = df_testset['date'].dt.year
df_testset['month'] = df_testset['date'].dt.month
df_testset['week_num'] = df_testset['date'].dt.week

df_testset['day_of_week']=df_testset['date'].dt.dayofweek
days = {0:'Mon',1:'Tues',2:'Weds',3:'Thurs',4:'Fri',5:'Sat',6:'Sun'}
df_testset['day_of_week'] = df_testset['day_of_week'].apply(lambda x: days[x])

In [123]:
# While creating the combinations, target_grouped1 column is used to add the target groups of each topic. 
# But the point to pay attention is we cannot create all the combinations of this column just like a seperate column.
# The reason is some belongs to PS and some to RRE. So we need to delete all unmatching topic and target groups. 
# Below for loop assigns np.nan to PS target group column if they are not in the initial list of PS target groups and the 
# same process for RRE. Then the rows containing empty values are dropped.

# df_testset['target_grouped'] = np.nan
some_lst = []

for i in range(len(df_testset)):
    if df_testset['topic'][i] == 'RRE':
        if df_testset['target_grouped1'][i] not in RRE_groups_upd:
#             df_testset['target_grouped'][i] = np.nan
            some_lst.append('')
        else:
#             df_testset['target_grouped'][i] = df_testset['target_grouped1'][i]
            some_lst.append(df_testset['target_grouped1'][i])
    elif df_testset['topic'][i] == 'PS':
        if df_testset['target_grouped1'][i] not in PS_groups_upd:
#             df_testset['target_grouped'][i] = np.nan
            some_lst.append('')
        else:
#             df_testset['target_grouped'][i] = df_testset['target_grouped1'][i]
            some_lst.append(df_testset['target_grouped1'][i])
    else:
        print("Value other than PS and RRE detected!")
        break
        
df_testset['target_grouped'] = pd.Series(some_lst)
df_testset['target_grouped'] = df_testset['target_grouped'].replace('',np.nan)
df_testset.dropna(inplace = True)
df_testset.drop(columns = ['target_grouped1'], inplace = True)

In [124]:
# Dropping the duplicates and resetting the index of the dataframe:
df_testset = df_testset.drop_duplicates().reset_index(drop = True)

In [125]:
# Just to verify that we deleted all unmatching topic and target group pairings.
df_testset['target_grouped'].value_counts()

evergreen                     3720
entphotographyentrepreneur    1860
infographic                   1860
photographers                 1860
pagevisitorremarketing        1860
Name: target_grouped, dtype: int64

In [126]:
# Column rearrangement:
df_testset = df_testset[['date','year','month','week_num','day_of_week','age','location','topic','target_grouped']]

In [127]:
df_testset

Unnamed: 0,date,year,month,week_num,day_of_week,age,location,topic,target_grouped
0,2019-08-01,2019,8,31,Thurs,18-24,US,RRE,evergreen
1,2019-08-01,2019,8,31,Thurs,18-24,US,RRE,pagevisitorremarketing
2,2019-08-01,2019,8,31,Thurs,18-24,US,PS,evergreen
3,2019-08-01,2019,8,31,Thurs,18-24,US,PS,entphotographyentrepreneur
4,2019-08-01,2019,8,31,Thurs,18-24,US,PS,infographic
5,2019-08-01,2019,8,31,Thurs,18-24,US,PS,photographers
6,2019-08-01,2019,8,31,Thurs,18-24,UK,RRE,evergreen
7,2019-08-01,2019,8,31,Thurs,18-24,UK,RRE,pagevisitorremarketing
8,2019-08-01,2019,8,31,Thurs,18-24,UK,PS,evergreen
9,2019-08-01,2019,8,31,Thurs,18-24,UK,PS,entphotographyentrepreneur


To verify that all the combinations are created, the length of the dataframe is calculated which turned out to be 1512. It is important to calculate the same number by multiplying the possibilities of each column. So let's multiply the amount of different:
- dates
- age groups
- locations
- topics
- target_grouped
$$ 5*6*6*6 = 1080 $$

This is the proof that each possible combination is created!

In [128]:
len(df_testset)

11160

In [133]:
31*6*10*6

11160

## Adding the incremental increase:

### Warning:
By creating the amount spent column, I created from 10 to 250 with increments of 10. But since we have so many combinations, the amount of 250 dollars might not be reached. If we assign a daily budget of 4000 dollars, we are left with 13 dollars per combination on average.
The upper limit of 250 dollars might need adjustment in the later stages.

In [139]:
# Since there are 60 increments, we need to concatenate the already existing dataframe 24 times with itself.
df_increment = pd.concat([df_testset]*len(pd.Series(range(50,3001,10)))).reset_index(drop=True)

In [138]:
pd.Series(range(50,3001,10))

0        50
1        60
2        70
3        80
4        90
5       100
6       110
7       120
8       130
9       140
10      150
11      160
12      170
13      180
14      190
15      200
16      210
17      220
18      230
19      240
20      250
21      260
22      270
23      280
24      290
25      300
26      310
27      320
28      330
29      340
       ... 
266    2710
267    2720
268    2730
269    2740
270    2750
271    2760
272    2770
273    2780
274    2790
275    2800
276    2810
277    2820
278    2830
279    2840
280    2850
281    2860
282    2870
283    2880
284    2890
285    2900
286    2910
287    2920
288    2930
289    2940
290    2950
291    2960
292    2970
293    2980
294    2990
295    3000
Length: 296, dtype: int64

In [142]:
inc = list(range(50,3001,10))

In [143]:
some_lst = []
for i in range(len(inc)):
    some_lst = some_lst + ([inc[i]]*len(df_testset))

Checking the length of :
- new dataframe
- 60 times the old one
- the list, which we will paste on the 'amount_spent_(usd)' column

They all need to give the same number and they do:

In [144]:
len(df_increment)

3303360

In [145]:
len(df_testset)*len(range(50,3001,10))

3303360

In [146]:
len(some_lst)

3303360

In [147]:
df_increment['amount_spend_usd'] = pd.Series(some_lst)

In [148]:
df_increment.reset_index(drop=True, inplace=True)

In [149]:
# Checking a certain combination and seeing whether the incremental increase in 'amount_spent_(usd)' works:
df_increment[(df_increment['day_of_week'] == 'Thurs') & (df_increment['location'] == 'US') & 
             (df_increment['topic'] == 'RRE') & (df_increment['target_grouped'] == 'evergreen') &
             (df_increment['age'] == '18-24') ]

Unnamed: 0,date,year,month,week_num,day_of_week,age,location,topic,target_grouped,amount_spend_usd
0,2019-08-01,2019,8,31,Thurs,18-24,US,RRE,evergreen,50
2520,2019-08-08,2019,8,32,Thurs,18-24,US,RRE,evergreen,50
5040,2019-08-15,2019,8,33,Thurs,18-24,US,RRE,evergreen,50
7560,2019-08-22,2019,8,34,Thurs,18-24,US,RRE,evergreen,50
10080,2019-08-29,2019,8,35,Thurs,18-24,US,RRE,evergreen,50
11160,2019-08-01,2019,8,31,Thurs,18-24,US,RRE,evergreen,60
13680,2019-08-08,2019,8,32,Thurs,18-24,US,RRE,evergreen,60
16200,2019-08-15,2019,8,33,Thurs,18-24,US,RRE,evergreen,60
18720,2019-08-22,2019,8,34,Thurs,18-24,US,RRE,evergreen,60
21240,2019-08-29,2019,8,35,Thurs,18-24,US,RRE,evergreen,60


In [150]:
df_increment.to_csv('/home/ec2-user/SageMaker/fb_generating_test_data_2019-07-09/testdata_aug.csv')

In [151]:
# Saving the file to S3:
from io import StringIO
import boto3

bucket='facebookconversionrates' # bucket name
data_key = 'test_data/testdata_aug.csv' # name of the csv file that will be created
csv_buffer = StringIO()
df_increment.to_csv(csv_buffer)
s3_resource = boto3.resource('s3')
s3_resource.Object(bucket, data_key).put(Body=csv_buffer.getvalue())

{'ResponseMetadata': {'RequestId': 'A6835C6B7ED730B0',
  'HostId': 'kN74GWi21tXz7Hgwj/o1+44hN8qMGh874+fpLnNz+PA+S8W/fmCLtthP8tf6IMZpBKY0n5L2qpQ=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'kN74GWi21tXz7Hgwj/o1+44hN8qMGh874+fpLnNz+PA+S8W/fmCLtthP8tf6IMZpBKY0n5L2qpQ=',
   'x-amz-request-id': 'A6835C6B7ED730B0',
   'date': 'Fri, 06 Sep 2019 14:45:07 GMT',
   'etag': '"9df4fd9ec0c6e263457d3471e7b78946"',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'ETag': '"9df4fd9ec0c6e263457d3471e7b78946"'}