In [26]:
import boto3
import datetime as dt
import pytz
import time
import uuid
import json
import s3fs
from urllib.parse import urlparse
import pandas as pd
import requests
import numpy as np
from decimal import *
from io import StringIO


In [86]:


cookies = {
}

data = '{"email":"btomasette@camelotsmm.com", "password":"Dunmore1"}'

auth = 'https://facebook.api.beeswax.com/rest/authenticate'

get_advertisers = 'https://facebook.api.beeswax.com/rest/advertiser'

save_report = 'https://facebook.api.beeswax.com/rest/report_save'

report_queue = 'https://facebook.api.beeswax.com/rest/report_queue'

list_item_bulk = 'https://facebook.api.beeswax.com/rest/list_item_bulk'

get_lists = 'https://facebook.api.beeswax.com/rest/custom_list'

bid_model_version = 'https://facebook.api.beeswax.com/rest/bid_model_version'

r = requests.post(auth, cookies=cookies, data=data)

tz = pytz.timezone('EST')

today = dt.datetime.now(tz)
sevenday = today - dt.timedelta(days=7)


In [3]:

# query athena view of impression, click, and joined conversion data for tag_id 3
query = """
SELECT  conversions,
         campaign_id,
         impressions,
         clicks,
         app_bundle,
         ad_position,
         geo_region,
         geo_country,
         platform_browser,
         platform_os,
         rewarded,
         platform_carrier,
         platform_device_make,
         platform_device_model,
         video_player_size,
         video_completes,
         content_language,
         time_of_week,
         companion_views,
         companion_clicks,
         banner_width,
         banner_height,
         inventory_source,
         inventory_interstitial,
         spend
FROM "fb-prod"."fbbatchprediction" 
""" 


In [4]:

# set database to facebook
database = 'fb-prod'
#set s3 output file for athena query
s3_output = 's3://fb-beeswax/brian/athena/%s/'%(dt.datetime.now(tz).strftime('%Y-%m-%d-%H%M%S'))

#Function for starting athena query
def run_query(query, database, s3_output):
    client = boto3.client('athena', region_name='us-east-1')
    response = client.start_query_execution(
        QueryString=query,
        QueryExecutionContext={
            'Database': database
            },
        ResultConfiguration={
            'OutputLocation': s3_output,
            }
        )
    print('Execution ID: ' + response['QueryExecutionId'])
    return response

#run athena query and kick back job id
job = run_query(query, database, s3_output)

job_id = job['QueryExecutionId']
client = boto3.client('athena', region_name='us-east-1')
res = client.get_query_execution(QueryExecutionId= job_id)
x = 0

# wait for athena to return results
while res['QueryExecution']['Status']['State'] != 'SUCCEEDED':
    print("it's been {} seconds".format(str(x)))
    time.sleep(10)
    x = x + 10
    res = client.get_query_execution(QueryExecutionId= job_id)

#set output location for query results
output = res['QueryExecution']['ResultConfiguration']['OutputLocation']


Execution ID: 8f52db4b-d080-4850-a929-a69b021fc7d1
it's been 0 seconds
it's been 10 seconds


In [5]:

#set aws machine learning client and build schema for datasource
mlclient = boto3.client('machinelearning', region_name='us-east-1')

data_id = str(uuid.uuid4())
data_name = 'fb-datasource-%s'%(dt.datetime.now(tz).strftime('%Y-%m-%d-%H%M%S'))

schema = {
        'excludedAttributeNames': [], 
        'version': '1.0', 
        'dataFormat': 'CSV', 
        'rowId': None, 
        'dataFileContainsHeader': True, 
        'attributes': [
            {
                'attributeName': 'conversions', 
                'attributeType': 'BINARY'
            }, {
                'attributeName' : 'campaign_id',
                'attributeType' : 'CATEGORICAL'
            }, {
                'attributeName': 'impressions', 
                'attributeType': 'NUMERIC'
            }, {
                'attributeName': 'clicks', 
                'attributeType': 'NUMERIC'
            }, {
                'attributeName': 'app_bundle', 
                'attributeType': 'CATEGORICAL'
            }, {
                'attributeName': 'ad_position', 
                'attributeType': 'CATEGORICAL'
            }, {
                'attributeName': 'geo_region', 
                'attributeType': 'CATEGORICAL'
            }, {
                'attributeName': 'geo_country', 
                'attributeType': 'CATEGORICAL'
            }, {
                'attributeName': 'platform_browser', 
                'attributeType': 'CATEGORICAL'
            }, {
                'attributeName': 'platform_os', 
                'attributeType': 'CATEGORICAL'
            }, {
                'attributeName': 'rewarded', 
                'attributeType': 'CATEGORICAL'
            }, {
                'attributeName': 'platform_carrier', 
                'attributeType': 'CATEGORICAL'
            }, {
                'attributeName': 'platform_device_make', 
                'attributeType': 'CATEGORICAL'
            }, {
                'attributeName': 'platform_device_model', 
                'attributeType': 'CATEGORICAL'
            }, {
                'attributeName': 'video_player_size', 
                'attributeType': 'CATEGORICAL'
            }, {
                'attributeName': 'video_completes', 
                'attributeType': 'CATEGORICAL'
            }, {
                'attributeName': 'content_language', 
                'attributeType': 'CATEGORICAL'
            }, {
                'attributeName': 'time_of_week', 
                'attributeType': 'CATEGORICAL'
            }, {
                'attributeName': 'companion_views', 
                'attributeType': 'CATEGORICAL'
            }, {
                'attributeName': 'companion_clicks', 
                'attributeType': 'CATEGORICAL'
            }, {
                'attributeName': 'banner_width', 
                'attributeType': 'CATEGORICAL'
            }, {
                'attributeName': 'banner_height', 
                'attributeType': 'CATEGORICAL'
            }, {
                'attributeName': 'inventory_source',
                'attributeType': 'CATEGORICAL'
            }, {
                'attributeName': 'inventory_interstitial',
                'attributeType': 'CATEGORICAL'
            }, {
                'attributeName': 'spend',
                'attributeType': 'NUMERIC'
            }
        ]
    }


# load new datasource into aws machine learning
response = mlclient.create_data_source_from_s3(DataSourceId=data_id, DataSourceName=data_name, DataSpec={"DataLocationS3" : output, "DataSchema" : json.dumps(schema)}, ComputeStatistics=True)


In [6]:

#build new batch prediction and set output file in S3
prediction_id = str(uuid.uuid4())
prediction_name = 'fb-prediction-%s'%(dt.datetime.now(tz).strftime('%Y-%m-%d-%H%M%S'))
prediction_output = 's3://fb-beeswax/brian/ml/%s/'%(dt.datetime.now(tz).strftime('%Y-%m-%d-%H%M%S'))
prediction_data_source_id = response['DataSourceId']
pred = mlclient.create_batch_prediction(BatchPredictionId=prediction_id, BatchPredictionName=prediction_name, MLModelId='fd8e2261-bc58-4e6a-a1af-5846c60d5df7', BatchPredictionDataSourceId=prediction_data_source_id , OutputUri=prediction_output)

get_pred = mlclient.get_batch_prediction(BatchPredictionId=pred['BatchPredictionId'])
gpt = 0
while get_pred['Status'] != 'COMPLETED':
    time.sleep(30)
    gpt = gpt + 30
    print("batch prediction processing for %s seconds" %gpt)
    get_pred = mlclient.get_batch_prediction(BatchPredictionId=pred['BatchPredictionId'])


batch prediction processing for 30 seconds
batch prediction processing for 60 seconds
batch prediction processing for 90 seconds
batch prediction processing for 120 seconds
batch prediction processing for 150 seconds
batch prediction processing for 180 seconds


In [7]:

#load original athena result into a dataframe
df = pd.read_csv(output)

#build score output url
o = urlparse(output)
score = get_pred['OutputUri'] + 'batch-prediction/result/' + get_pred['BatchPredictionId'] + '-' + o[2].split('/')[4].split('.')[0] + '.csv.gz'

#load score into a dataframe
ds = pd.read_csv(score, compression='gzip')

df['score'] = ds['score']


In [30]:
#calculate range of scores
#range = df['score'].max() - df['score'].min()

#zero out any items below 0.009

#df['bid_mod'] = df['score'].apply(lambda y : 0 if y < 0.01 else y)

s3 = boto3.client(
    's3', region_name='us-east-1')
csv_buffer = StringIO()
df.to_csv(csv_buffer, index=False)
res = s3.put_object(Body = csv_buffer.getvalue(),
                               ContentType='text/csv',
                               Bucket='fb-beeswax',
                               Key = 'brian/ml-prediction/{}/{}.csv'.format(dt.datetime.now(tz).strftime('%Y-%m-%d'), prediction_name ))


In [9]:
ds.head()

Unnamed: 0,bestAnswer,score
0,1,0.688996
1,0,0.02305
2,0,0.016408
3,0,0.041295
4,0,0.051135


In [10]:
df.head()

Unnamed: 0,conversions,campaign_id,impressions,clicks,app_bundle,ad_position,geo_region,geo_country,platform_browser,platform_os,...,content_language,time_of_week,companion_views,companion_clicks,banner_width,banner_height,inventory_source,inventory_interstitial,spend,score
0,0,6,4,0,com.mobilityware.solitaire,POSITION_UNKNOWN,USA/MO,USA,Chrome Mobile,Android,...,,1022,0,0,-1,-1,RUBICON,0,0.013644,0.688996
1,0,6,2,0,com.accuweather.android,POSITION_UNKNOWN,USA/IL,USA,Chrome Mobile,Android,...,,1023,0,0,-1,-1,RUBICON,0,0.00742,0.02305
2,0,6,1,0,com.podcast.podcasts,,USA/OK,USA,Chrome Mobile,Android,...,,1024,0,0,-1,-1,STICKYADS,0,0.002687,0.016408
3,0,6,1,0,com.mobilityware.spider,POSITION_UNKNOWN,USA/CA,USA,Chrome Mobile,Android,...,,1031,0,0,-1,-1,MILLENNIAL,1,0.001845,0.041295
4,0,6,3,0,com.naver.linewebtoon,,USA/MA,USA,Chrome Mobile,Android,...,,1031,0,0,-1,-1,TREMOR,1,0.010466,0.051135


In [72]:
dz = df.drop(['conversions', 'video_completes', 'impressions', 'clicks', 'companion_views', 'companion_clicks', 'spend', 'time_of_week'], axis=1)

In [73]:
dz.head()

Unnamed: 0,campaign_id,app_bundle,ad_position,geo_region,geo_country,platform_browser,platform_os,rewarded,platform_carrier,platform_device_make,platform_device_model,video_player_size,content_language,banner_width,banner_height,inventory_source,inventory_interstitial,score
0,6,com.mobilityware.solitaire,POSITION_UNKNOWN,USA/MO,USA,Chrome Mobile,Android,-1,AT&T,LG,M257,M,,-1,-1,RUBICON,0,0.688996
1,6,com.accuweather.android,POSITION_UNKNOWN,USA/IL,USA,Chrome Mobile,Android,-1,AT&T,Samsung,SM-G891A,L,,-1,-1,RUBICON,0,0.02305
2,6,com.podcast.podcasts,,USA/OK,USA,Chrome Mobile,Android,-1,,Samsung,SM-J327R4,S,,-1,-1,STICKYADS,0,0.016408
3,6,com.mobilityware.spider,POSITION_UNKNOWN,USA/CA,USA,Chrome Mobile,Android,-1,,Samsung,SM-S327VL,S,,-1,-1,MILLENNIAL,1,0.041295
4,6,com.naver.linewebtoon,,USA/MA,USA,Chrome Mobile,Android,-1,Verizon,Samsung,SM-N900V,M,,-1,-1,TREMOR,1,0.051135


In [74]:
dz.rename(columns={'score':'value'}, inplace=True)

In [75]:
dz.head()


Unnamed: 0,campaign_id,app_bundle,ad_position,geo_region,geo_country,platform_browser,platform_os,rewarded,platform_carrier,platform_device_make,platform_device_model,video_player_size,content_language,banner_width,banner_height,inventory_source,inventory_interstitial,value
0,6,com.mobilityware.solitaire,POSITION_UNKNOWN,USA/MO,USA,Chrome Mobile,Android,-1,AT&T,LG,M257,M,,-1,-1,RUBICON,0,0.688996
1,6,com.accuweather.android,POSITION_UNKNOWN,USA/IL,USA,Chrome Mobile,Android,-1,AT&T,Samsung,SM-G891A,L,,-1,-1,RUBICON,0,0.02305
2,6,com.podcast.podcasts,,USA/OK,USA,Chrome Mobile,Android,-1,,Samsung,SM-J327R4,S,,-1,-1,STICKYADS,0,0.016408
3,6,com.mobilityware.spider,POSITION_UNKNOWN,USA/CA,USA,Chrome Mobile,Android,-1,,Samsung,SM-S327VL,S,,-1,-1,MILLENNIAL,1,0.041295
4,6,com.naver.linewebtoon,,USA/MA,USA,Chrome Mobile,Android,-1,Verizon,Samsung,SM-N900V,M,,-1,-1,TREMOR,1,0.051135


In [76]:
s3 = boto3.client(
    's3', aws_access_key_id='AKIAJKXTYAFKSXDNEWKQ',
    aws_secret_access_key='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', region_name='us-east-1')
csv_buffer = StringIO()
dz.to_csv(csv_buffer, sep='|', index=False)
bucket = 'beeswax-data-us-east-1'
prefix = 'bid_models/facebook/predictions/'
key = '{}_{}.csv'.format(dt.datetime.now(tz).strftime('%Y-%m-%d'), prediction_name )
res = s3.put_object(Body = csv_buffer.getvalue(),
                               ContentType='text/csv',
                               Bucket= bucket,
                               Key = prefix + key)


In [79]:
print(key)
print(prefix+key)
print(res)


2019-02-21_fb-prediction-2019-02-19-225941.csv
bid_models/facebook/predictions/2019-02-21_fb-prediction-2019-02-19-225941.csv
{'ResponseMetadata': {'RequestId': '65FE26D74AF387A7', 'HostId': 'TT/nRuy1hNhBzutfipRDCznq6Z/ZJTMDMIEdT5JLyZlWLtIsD7TMjmURp9rbroxY2N5Nsd5j7/o=', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amz-id-2': 'TT/nRuy1hNhBzutfipRDCznq6Z/ZJTMDMIEdT5JLyZlWLtIsD7TMjmURp9rbroxY2N5Nsd5j7/o=', 'x-amz-request-id': '65FE26D74AF387A7', 'date': 'Thu, 21 Feb 2019 20:08:26 GMT', 'x-amz-version-id': 'bJAG3vfa13fre.nLfgY5YYxyGv7SuaQw', 'etag': '"3874522f59cb31ffbb1d4d7aca624f15"', 'content-length': '0', 'server': 'AmazonS3'}, 'RetryAttempts': 0}, 'ETag': '"3874522f59cb31ffbb1d4d7aca624f15"', 'VersionId': 'bJAG3vfa13fre.nLfgY5YYxyGv7SuaQw'}


In [78]:
dmanifest = dz.drop(['value'], axis=1)





In [80]:
manifest = {
    'model_predictions' : [
        's3://{}/{}'.format(bucket, prefix+key)
    ],
    'metadata' : {
        'fields' : dmanifest.columns.tolist()
    }
}

In [81]:
print(manifest)

{'model_predictions': ['s3://beeswax-data-us-east-1/bid_models/facebook/predictions/2019-02-21_fb-prediction-2019-02-19-225941.csv'], 'metadata': {'fields': ['campaign_id', 'app_bundle', 'ad_position', 'geo_region', 'geo_country', 'platform_browser', 'platform_os', 'rewarded', 'platform_carrier', 'platform_device_make', 'platform_device_model', 'video_player_size', 'content_language', 'banner_width', 'banner_height', 'inventory_source', 'inventory_interstitial']}}


In [82]:

s3resource = boto3.resource('s3', aws_access_key_id='AKIAJKXTYAFKSXDNEWKQ',
    aws_secret_access_key='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', region_name='us-east-1')
obj = s3resource.Object(bucket,'bid_models/facebook/customer_manifests/manifest_{}.json'.format(key))
obj.put(Body=json.dumps(manifest))


{'ResponseMetadata': {'RequestId': '999510C1D4F3BCF8',
  'HostId': '0xRmGW3Onaif21Da5qr5BiVwnKnzAAOsGzNZsnSeRm4H/UGMHH/gYWNdn+5filtItQs54YWLHv0=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': '0xRmGW3Onaif21Da5qr5BiVwnKnzAAOsGzNZsnSeRm4H/UGMHH/gYWNdn+5filtItQs54YWLHv0=',
   'x-amz-request-id': '999510C1D4F3BCF8',
   'date': 'Thu, 21 Feb 2019 20:12:15 GMT',
   'x-amz-version-id': 'uUJke6HRjXBgCs7acYfmqvAbJErq76Ba',
   'etag': '"58efdff8b40f2c88fc08dc48eac48c07"',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'ETag': '"58efdff8b40f2c88fc08dc48eac48c07"',
 'VersionId': 'uUJke6HRjXBgCs7acYfmqvAbJErq76Ba'}

In [91]:
manifest_path = 's3://{}/{}'.format(bucket, 'bid_models/facebook/customer_manifests/manifest_{}.json'.format(key))

In [92]:
manifest_path


's3://beeswax-data-us-east-1/bid_models/facebook/customer_manifests/manifest_2019-02-21_fb-prediction-2019-02-19-225941.csv.json'

In [85]:
obj.Acl().put(ACL='bucket-owner-full-control')

{'ResponseMetadata': {'RequestId': '16D015D17620BAD7',
  'HostId': 'aplHJ4a3FSflGoEyc9igA7CW1LEosQjxNyDmGYD5jeB2QfmBPRbYvkIEcXuy102IsunNjd9+ruM=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'aplHJ4a3FSflGoEyc9igA7CW1LEosQjxNyDmGYD5jeB2QfmBPRbYvkIEcXuy102IsunNjd9+ruM=',
   'x-amz-request-id': '16D015D17620BAD7',
   'date': 'Thu, 21 Feb 2019 20:13:28 GMT',
   'x-amz-version-id': 'uUJke6HRjXBgCs7acYfmqvAbJErq76Ba',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0}}

In [87]:
model = {
    "active" : True,
    "bid_model_id" : 1,
    "bid_model_version_name" : '{}_{}'.format(dt.datetime.now(tz).strftime('%Y-%m-%d-%H%M%S'), prediction_name ),
    "manifest_s3_path" : manifest_path
}

In [88]:
model

{'active': True,
 'bid_model_id': 1,
 'bid_model_version_name': '2019-02-21-152141_fb-prediction-2019-02-19-225941',
 'manifest_s3_path': 's3://beeswax-data-us-east-1/bid_models/facebook/customer_manifests/manifest_2019-02-21_fb-prediction-2019-02-19-225941.csv.json'}

In [89]:
upload_model = requests.post(bid_model_version, cookies=r.cookies, data=json.dumps(model))

In [90]:
upload_model.json()

{'success': True,
 'payload': {'id': 2},
 'message': 'bid_model_version created with ID = 2'}