In [4]:
import pyhive
import boto3
import datetime as dt
import pandas as pd
from io import StringIO 
import pytz
import requests
import json

In [5]:
emr_client = boto3.client(
    'emr',
    region_name='us-east-1',
)

In [6]:
tz = pytz.timezone('EST')
buzzkey= 'zynga'
bucket = 'zynga-beeswax'


In [6]:
emr_id = emr_client.run_job_flow(
    Name='dfs-audience-presto-cluster-1',
    LogUri='s3://{buzzkey}-beeswax/emr-logs'.format(buzzkey=buzzkey),
    ReleaseLabel='emr-5.23.0',
    Instances={
        'InstanceGroups': [
            {
                'Name': 'Master-1',
                'Market': 'ON_DEMAND',
                'InstanceRole': 'MASTER',
                'InstanceType': 'm4.2xlarge',
                'InstanceCount': 1,
                'EbsConfiguration': {
                    'EbsBlockDeviceConfigs': [
                        {
                            'VolumeSpecification': {
                                'VolumeType': 'gp2',
                                'SizeInGB': 64
                            },
                            'VolumesPerInstance': 1
                        },
                    ],
                    'EbsOptimized': True
                },
            },{
                'Name': 'Core-1',
                'Market': 'ON_DEMAND',
                'InstanceRole': 'CORE',
                'InstanceType': 'm4.2xlarge',
                'InstanceCount': 2,
                'EbsConfiguration': {
                    'EbsBlockDeviceConfigs': [
                        {
                            'VolumeSpecification': {
                                'VolumeType': 'gp2',
                                'SizeInGB': 64
                            },
                            'VolumesPerInstance': 1
                        },
                    ],
                    'EbsOptimized': True
                },
            },{
                'Name': 'Task-1',
                'Market': 'SPOT',
                'BidPrice': '0.09',
                'InstanceRole': 'TASK',
                'InstanceType': 'm4.2xlarge',
                'InstanceCount': 2,
                'EbsConfiguration': {
                    'EbsBlockDeviceConfigs': [
                        {
                            'VolumeSpecification': {
                                'VolumeType': 'gp2',
                                'SizeInGB': 64
                            },
                            'VolumesPerInstance': 1
                        },
                    ],
                    'EbsOptimized': True
                },
            }
            
        ],
        'Ec2KeyName': 'fb',
        'KeepJobFlowAliveWhenNoSteps': True,
        'TerminationProtected': False,
        'HadoopVersion': '2.8.5',
        'Ec2SubnetId': 'subnet-0060120f',
        'EmrManagedMasterSecurityGroup': 'sg-0c84d34f4bb911867',
        'EmrManagedSlaveSecurityGroup': 'sg-0c84d34f4bb911867'
    },
    Applications=[
         {
                'Name': 'Hadoop' 
                
            },{
                'Name': 'Hive'
            },{
                'Name': 'Presto'
            }
    ],
    Configurations= [
        {
            'Classification': 'hive-site',
            'Properties': {
                'hive.metastore.client.factory.class': 'com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory'
            }
        },{
            'Classification': 'presto-connector-hive',
            'Properties': {
                'hive.metastore.glue.datacatalog.enabled': 'true'
            }
        }
    ],
    VisibleToAllUsers=True,
    JobFlowRole='EMR_EC2_DefaultRole',
    ServiceRole='EMR_DefaultRole',
    Tags=[
        {
            'Key': 'buzzkey',
            'Value': '{buzzkey}'.format(buzzkey=buzzkey)
        },
    ],
    AutoScalingRole='EMR_AutoScaling_DefaultRole',
    ScaleDownBehavior='TERMINATE_AT_TASK_COMPLETION',
    EbsRootVolumeSize=10
 
)

In [7]:
cluster = emr_client.describe_cluster(
    ClusterId= emr_id['JobFlowId']
)

In [8]:
cluster

{'Cluster': {'Id': 'j-3L94DWPWHEBHR',
  'Name': 'dfs-audience-presto-cluster-1',
  'Status': {'State': 'STARTING',
   'StateChangeReason': {},
   'Timeline': {'CreationDateTime': datetime.datetime(2019, 4, 13, 22, 4, 41, 295000, tzinfo=tzlocal())}},
  'Ec2InstanceAttributes': {'Ec2KeyName': 'fb',
   'Ec2SubnetId': 'subnet-0060120f',
   'RequestedEc2SubnetIds': ['subnet-0060120f'],
   'RequestedEc2AvailabilityZones': [],
   'IamInstanceProfile': 'EMR_EC2_DefaultRole',
   'EmrManagedMasterSecurityGroup': 'sg-0c84d34f4bb911867',
   'EmrManagedSlaveSecurityGroup': 'sg-0c84d34f4bb911867'},
  'InstanceCollectionType': 'INSTANCE_GROUP',
  'LogUri': 's3n://zynga-beeswax/emr-logs/',
  'ReleaseLabel': 'emr-5.23.0',
  'AutoTerminate': False,
  'TerminationProtected': False,
  'VisibleToAllUsers': True,
  'Applications': [{'Name': 'Hadoop', 'Version': '2.8.5'},
   {'Name': 'Hive', 'Version': '2.3.4'},
   {'Name': 'Presto', 'Version': '0.215'}],
  'Tags': [{'Key': 'buzzkey', 'Value': 'zynga'}],
  '

In [9]:
waiter = emr_client.get_waiter('cluster_running')

In [10]:
waiter.wait(ClusterId=emr_id['JobFlowId'])
print('cluster {} is running'.format(emr_id['JobFlowId']))

cluster j-3L94DWPWHEBHR is running


In [11]:
master = emr_client.list_instances(
    ClusterId= emr_id['JobFlowId'],
    InstanceGroupTypes=[
        'MASTER'
    ],
    InstanceStates=[
        'RUNNING'
    ]
)

In [12]:
master_ip = master['Instances'][0]['PrivateIpAddress']

In [7]:
master_ip

In [14]:
#!conda install pyhive --y

In [8]:
from pyhive import hive  # or import hive
cursor = hive.connect(master_ip, username='hadoop').cursor()

In [16]:
#cursor.execute("drop table zynga_audience_output")

In [17]:
#cursor.execute("drop table zynga_auctions_csv")

In [18]:
#cursor.execute("drop table zynga_auctions_orc")

In [19]:
#cursor.execute("create database {buzzkey}_audience_exp".format(buzzkey=buzzkey))

In [9]:
cursor.execute("use {buzzkey}_audience_exp".format(buzzkey=buzzkey))

In [10]:
cursor.execute("show tables")

In [11]:
print(cursor.fetchall())

[('zynga_auctions_csv',), ('zynga_auctions_orc',), ('zynga_audience_output',)]


In [12]:
#cursor.execute( """
#CREATE EXTERNAL TABLE zynga_auctions_orc(
#app_bundle VARCHAR(255)
#, user_id VARCHAR(255)
#, app_name VARCHAR(255)
#, auction_id VARCHAR(255)
#, bid_time TIMESTAMP
#) 
#STORED AS orc
#LOCATION 's3://zynga-beeswax/hive/orc/'""")


cursor.execute("""
CREATE EXTERNAL TABLE IF NOT EXISTS zynga_auctions_csv(
ad_position VARCHAR(255)
, app_bundle VARCHAR(255)
, app_id VARCHAR(255)
, app_name VARCHAR(255)
, auction_id VARCHAR(255)
, bid_time TIMESTAMP
, category VARCHAR(255)
, content_coppa_flag BOOLEAN
, content_language VARCHAR(255)
, content_rating VARCHAR(255)
, domain VARCHAR(255)
, environment_type VARCHAR(255)
, geo_city VARCHAR(255)
, geo_country VARCHAR(255)
, geo_metro VARCHAR(255)
, geo_region VARCHAR(255)
, geo_zip VARCHAR(255)
, inventory_interstitial BOOLEAN
, inventory_source VARCHAR(255)
, inventory_source_relationship VARCHAR(255)
, ip_address VARCHAR(255)
, ip_range VARCHAR(255)
, placement VARCHAR(255)
, platform_bandwidth VARCHAR(255)
, platform_browser VARCHAR(255)
, platform_browser_version VARCHAR(255)
, platform_carrier VARCHAR(255)
, platform_device_didmd5 VARCHAR(255)
, platform_device_didsha1 VARCHAR(255)
, platform_device_dpidmd5 VARCHAR(255)
, platform_device_dpidsha1 VARCHAR(255)
, platform_device_idfa VARCHAR(255)
, platform_device_ifa VARCHAR(255)
, platform_device_make VARCHAR(255)
, platform_device_model VARCHAR(255)
, platform_device_screen_size VARCHAR(255)
, platform_device_type VARCHAR(255)
, platform_js BOOLEAN
, platform_os VARCHAR(255)
, platform_os_version VARCHAR(255)
, segment_id VARCHAR(600)
, segment_user_id INT
, site_id VARCHAR(255)
, site_name VARCHAR(255)
, time_of_week INT
, user_id VARCHAR(255)
, video_boxing_allowed BOOLEAN
, video_companion_required BOOLEAN
, video_playback_method VARCHAR(255)
, video_player_size VARCHAR(255)
, video_start_delay INT
, test BOOLEAN
, placement_type VARCHAR(255)
, geo_lat VARCHAR(50)
, geo_long VARCHAR(50)
, video_min_duration INT
, video_max_duration INT
, video_player_width INT
, video_player_height INT
, banner_width INT
, banner_height INT
, banner_width_max INT
, banner_height_max INT
, banner_width_min INT
, banner_height_min INT
, dnt INT
, geo_type VARCHAR(20)
, bid_time_epoch_in_usecs INT
, page_url VARCHAR(255)
, exchange_predicted_view_rate INT
, available_deal_ids VARCHAR(255)
, battrs VARCHAR(255)
, exchange_auction_id VARCHAR(255)
, rewarded INT
, ua VARCHAR(255)
, bid_floor_micros INT
, bid_floor_currency VARCHAR(10)
, display_manager VARCHAR(255)
, display_manager_ver VARCHAR(255)
, exchange_device_make VARCHAR(255)
, exchange_device_model VARCHAR(255)
, user_id_type VARCHAR(20)
, auction_type INT
, publisher_id VARCHAR(255)
, ads_txt VARCHAR(20)
, matched_user_groups VARCHAR(255)
, ipv6_address VARCHAR(255)
, user_id_hashed VARCHAR(255)
, ip_address_hashed VARCHAR(255)
, ipv6_address_hashed VARCHAR(255)
, is_gdpr INT
, gdpr_consent_string VARCHAR(255)
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LOCATION 's3://zynga-beeswax/hive/csv/'""")

In [13]:
def load_auction_data_hour(hour, overwrite):
    if overwrite:
        cursor.execute("""LOAD DATA INPATH 
        's3://fb-beeswax-east/beeswax-logs/auctions/year=2019/month=04/day=12/hour={hour}/min=*/*/BidRequest/*.gz' 
        OVERWRITE INTO TABLE zynga_audience_exp.zynga_auctions_csv""".format(hour=hour))
    else:
        cursor.execute("""LOAD DATA INPATH 
        's3://fb-beeswax-east/beeswax-logs/auctions/year=2019/month=04/day=12/hour={hour}/min=*/*/BidRequest/*.gz' 
        INTO TABLE zynga_audience_exp.zynga_auctions_csv""".format(hour=hour))
    
    

In [14]:
def load_slim_auction_data_orc(overwrite):
    if overwrite:
        cursor.execute("""
        INSERT OVERWRITE INTO TABLE zynga_auctions_orc
        SELECT app_bundle, user_id, app_name, auction_id, bid_time FROM zynga_auctions_csv 
        """)
    else:
        cursor.execute("""
        INSERT INTO TABLE zynga_auctions_orc
        SELECT app_bundle, user_id, app_name, auction_id, bid_time FROM zynga_auctions_csv 
        """)
    

In [15]:
hours = [['00', True], ['01', False], ['02', False], ['03', False], ['04', False], ['05', False], ['06', False], ['07', False], ['08', False], ['09', False], ['10', False], ['11', False], ['12', False], ['13', False], ['14', False], ['15', False], ['16', False], ['17', False], ['18', False], ['19', False], ['20', False], ['21', False], ['22', False], ['23', False]]




In [None]:
for hour in hours:
    try:
        load_auction_data_hour(hour[0], True)
    except:
        print('File for hour {} failed to load.'.format(hour[0]))
        continue
    print('CSV auction data for hour {} loaded into hive...'.format(hour[0]))
    load_slim_auction_data_orc(hour[1])
    print('ORC auction data for hour {} loaded into hive...'.format(hour[0]))

File for hour 00 failed to load.
File for hour 01 failed to load.
File for hour 02 failed to load.
File for hour 03 failed to load.
File for hour 04 failed to load.
File for hour 05 failed to load.
File for hour 06 failed to load.
File for hour 07 failed to load.
File for hour 08 failed to load.
File for hour 09 failed to load.
CSV auction data for hour 10 loaded into hive...
ORC auction data for hour 10 loaded into hive...
CSV auction data for hour 11 loaded into hive...
ORC auction data for hour 11 loaded into hive...


In [16]:
from pyhive import presto  # or import hive
prestocur = presto.connect(master_ip, port=8889).cursor()

In [17]:
poker_apps = ['com.luckyday.app'
,'com.playtika.caesarscasino'
,'slots.machine.winning.android'
,'com.huuuge.casino.slots'
,'com.bingo.blaze.free'
,'air.com.bitrhymes.bingo'
,'com.murka.slotsera'
,'com.tw.tycoon.casino'
,'com.murka.scatterslots'
,'com.leftover.CoinDozer'
,'com.huuuge.casino.slots'
,'com.murka.scatterslots'
,'com.luckyday.app'
,'com.huuuge.casino.texas'
,'com.murka.slotsera'
,'slots.pcg.casino.games.free.android'
,'com.williamsinteractive.jackpotparty'
,'com.tw.tycoon.casino'
,'air.com.playtika.slotomania'
,'air.com.buffalo_studios.newflashbingo'
,'com.doubleugames.DoubleUCasino'
,'com.murka.infinityslots'
,'com.igs.fafafa'
,'com.williamsinteractive.goldfish'
,'com.scientificgames.monopolyslots'
,'com.wisewide.lbc.vegas'
,'com.bagelcode.slots1'
,'com.pacificinteractive.HouseOfFun'
,'com.ddi'
,'games.baobab.boomboom'
,'com.octro.teenpatti'
,'com.playtika.caesarscasino'
,'com.diamondlife.slots.vegas.free'
,'com.goplayplay.css.android'
,'com.deeptown.gaple'
,'air.com.spicerackmedia.bingoshowdown'
,'com.ftxgames.twdslots'
,'com.wonderpeople.megahitpoker.global'
,'com.ballytechnologies.f88'
,'com.teenpatti.hd.gold'
,'com.huuuge.stars.slots'
,'com.playstudios.popslots'
,'air.com.wizits.slotagram.android'
,'com.productmadness.cashmancasino'
,'com.pokaapoker.texas'
,'com.leftover.CoinDozer'
,'com.blastworks.slingoarcade'
,'com.doubleugames.take5'
,'com.ballytechnologies.quickhitslots'
,'com.house.of.blackjack21.free.online.casino'
,'com.casinojoy.videoslots'
,'com.productmadness.hovmobile'
,'com.selfawaregames.acecasino'
,'com.winnersclub.empire88slots'
,'kr.co.tk.game.bingo75.google'
,'com.blowfire.wildclassic'
,'com.zynga.livepoker'
,'slots.machine.winning.android'
,'air.com.wizits.vegas'
,'com.dominocard.uptodown'
,'net.flysher.rockncash'
,'com.link.newslots'
,'net.spintowinslots.androidresub'
,'ppl.unity.cubeslots'
,'com.DgnGames.OldVegasSlots'
,'com.grandegames.slots.dafu.casino'
,'com.igs.goldentigerslots'
,'com.rubyseven.rsvideopoker'
,'com.rubyseven.luckynorthcasino'
,'air.com.wheelslot.jackpotspin'
,'com.playstudios.mykonami'
,'com.rubyseven.bestbetcasino'
,'com.slots.classicvegas'
,'com.playstudios.myvegas'
,'com.pipastudios.bingobloon'
,'com.neptune.domino'
,'air.com.glidingdeer.bingodrivemobile'
,'com.uken.bingo_infinity'
,'com.madovergames.SlotsBlueDiamond'
,'com.joj.idtexas'
,'air.com.everi.hrv'
,'com.luckmob.freeslots'
,'com.purplekiwii.vegaslive'
,'com.slots.casinojoy2'
,'com.gamesofa.android.luxytexasholdem'
,'air.dd.casino.slotc1'
,'com.memoriki.fullhousecasino'
,'com.slots.allvegas'
,'com.slots.classicvegas2'
,'com.gamepoint.bingo'
,'com.classicslots.luckycity'
,'com.rubyseven.tropworldvideopoker'
,'com.doubledowninteractive.ftknox'
,'air.com.everi.sjs'
,'air.com.gameaccount.empireCity.slots'
,'air.com.gan.turningstone.slots'
,'com.AsgardCasino'
,'com.jx.cc'
,'com.slotsclub.sevenslots']

In [18]:
racing_apps = ['com.dmi.nascarheat'
, 'com.x3m.tx4'
, 'com.deemedyainc.dx2'
, 'com.wolvesinteractive.traffictour'
, 'com.deemedyainc.dx'
, 'com.creativemobile.nno'
, 'com.hutchgames.hilldash2'
, 'com.naturalmotion.customstreetracer2'
, 'com.abcgame.sloperun'
, 'com.traviangames.ugpx'
, 'com.brian.marblerace'
, 'com.abcgame.fallingball'
, 'games.teatime.hyperspeed'
, 'com.codemasters.F1Mobile'
, 'com.car.race.supercar'
, 'com.combineinc.streetracing.driftthreeD'
, 'com.wolvesinteractive.traffictourbike'
, 'com.zipper9.flipracer'
, 'com.naturalmotion.customstreetracer2'
, 'com.kwalee.skiddycar'
, 'com.dubaipolice.uaedrifting' ]

In [44]:
p_apps = "\'" + "\' , \'".join(poker_apps) + "\'"
print(p_apps)

'com.luckyday.app' , 'com.playtika.caesarscasino' , 'slots.machine.winning.android' , 'com.huuuge.casino.slots' , 'com.bingo.blaze.free' , 'air.com.bitrhymes.bingo' , 'com.murka.slotsera' , 'com.tw.tycoon.casino' , 'com.murka.scatterslots' , 'com.leftover.CoinDozer' , 'com.huuuge.casino.slots' , 'com.murka.scatterslots' , 'com.luckyday.app' , 'com.huuuge.casino.texas' , 'com.murka.slotsera' , 'slots.pcg.casino.games.free.android' , 'com.williamsinteractive.jackpotparty' , 'com.tw.tycoon.casino' , 'air.com.playtika.slotomania' , 'air.com.buffalo_studios.newflashbingo' , 'com.doubleugames.DoubleUCasino' , 'com.murka.infinityslots' , 'com.igs.fafafa' , 'com.williamsinteractive.goldfish' , 'com.scientificgames.monopolyslots' , 'com.wisewide.lbc.vegas' , 'com.bagelcode.slots1' , 'com.pacificinteractive.HouseOfFun' , 'com.ddi' , 'games.baobab.boomboom' , 'com.octro.teenpatti' , 'com.playtika.caesarscasino' , 'com.diamondlife.slots.vegas.free' , 'com.goplayplay.css.android' , 'com.deeptown.ga

In [45]:
#prestocur.execute("select count(*) from zynga_audience_exp.zynga_auctions_orc")

In [46]:
#prestocur.fetchall()

In [47]:
#prestocur.execute("""select user_id, app_bundle, app_name from zynga_auctions_orc where app_bundle in ({}) group by user_id, app_bundle, app_name limit 5 """.format(p_apps))

In [48]:
df = pd.read_sql("select distinct user_id from zynga_audience_exp.zynga_auctions_orc where app_bundle in ({}) ".format(p_apps), presto.connect(master_ip, port=8889))

In [49]:
df.head()

Unnamed: 0,user_id
0,mid.E99D8383-4061-4005-985A-D89BBD055B4D
1,mid.9A2D438D-1F3D-4627-97B8-6A18FE0D958A
2,mid.7ED64066-5245-4527-9FD0-5B085B3FD231
3,mid.F5D73163-934D-4BFD-BD34-2FDE5323E947
4,mid.1C566A54-49E1-4229-AB0A-FFA7DDEE513A


In [50]:
len(df)

713291

In [51]:
s3 = boto3.client(
    's3', aws_access_key_id='AKIAVLD7ZT4UBN6XMMG5',
    aws_secret_access_key='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', region_name='us-east-1')
s3resource = boto3.resource('s3', aws_access_key_id='AKIAVLD7ZT4UBN6XMMG5',
    aws_secret_access_key='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', region_name='us-east-1')

In [52]:
#bucket = s3resource.Bucket('zynga-beeswax')
#prefix_objs = bucket.objects.filter(Prefix="hive/audience-output")
#keys = []
#for obj in prefix_objs:
#    keys.append(obj.key)

In [53]:
#keys

In [54]:
len(df)

713291

In [55]:
df = pd.DataFrame(df.user_id.str.split('.',1).tolist(),
                                   columns = ['mid','user_id'])

In [56]:
df = df.drop(columns=['mid'])

In [57]:
df['segment'] = 'zynga-21'

In [58]:
df.head()

Unnamed: 0,user_id,segment
0,E99D8383-4061-4005-985A-D89BBD055B4D,zynga-21
1,9A2D438D-1F3D-4627-97B8-6A18FE0D958A,zynga-21
2,7ED64066-5245-4527-9FD0-5B085B3FD231,zynga-21
3,F5D73163-934D-4BFD-BD34-2FDE5323E947,zynga-21
4,1C566A54-49E1-4229-AB0A-FFA7DDEE513A,zynga-21


In [59]:
file_name_date = dt.datetime.now(tz).strftime('%Y-%m-%d-%H%M%S')
csv_buffer = StringIO()
df.to_csv(csv_buffer, sep='|', index=False, header=False)
bw_bucket = 'beeswax-data-us-east-1'
prefix = 'user-list/{buzzkey}/'.format(buzzkey=buzzkey)
key = '{}_{}_file.txt'.format(file_name_date, '{buzzkey}-predictive-audience'.format(buzzkey=buzzkey) )
res = s3.put_object(Body = csv_buffer.getvalue(),
                    ContentType='text/csv',
                    Bucket= bw_bucket,
                    Key = prefix + key)
s3resource.Object(bw_bucket, prefix+key).Acl().put(ACL='bucket-owner-full-control')

{'ResponseMetadata': {'RequestId': 'AE0509B8377E35D7',
  'HostId': 'B5qqrOZhNf6t9cWl7/aR01fJxR0vU1HjbYIap16ckNOfbRSx3C1zoouB8cBw6tU4jQOS3XmoruQ=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'B5qqrOZhNf6t9cWl7/aR01fJxR0vU1HjbYIap16ckNOfbRSx3C1zoouB8cBw6tU4jQOS3XmoruQ=',
   'x-amz-request-id': 'AE0509B8377E35D7',
   'date': 'Sun, 14 Apr 2019 02:40:53 GMT',
   'x-amz-version-id': 'SIkcHGoHd5zYY85wbdRM1.oStaayFmBG',
   'content-length': '0',
   'server': 'AmazonS3'},
  'RetryAttempts': 0}}

In [60]:
csv_buffer = StringIO()
df.to_csv(csv_buffer, sep='|', index=False, header=False)
prefix = 'user-list/{buzzkey}/'.format(buzzkey=buzzkey)
key = '{}_{}_file.txt'.format(file_name_date, '{buzzkey}-predictive-audience'.format(buzzkey=buzzkey) )
res = s3.put_object(Body = csv_buffer.getvalue(),
                    ContentType='text/csv',
                    Bucket= bucket,
                    Key = prefix + key)

In [61]:
cookies = {
}

data = '{"email":"brian@dataframesystems.com", "password":"Dunmore1"}'

auth = 'https://{}.api.beeswax.com/rest/authenticate'.format(buzzkey)

segment_upload = 'https://{}.api.beeswax.com/rest/segment_upload'.format(buzzkey)



r = requests.post(auth, cookies=cookies, data=data)

In [62]:

file_payload = {"segment_file_list":["s3://beeswax-data-us-east-1/user-list/{buzzkey}/{key}".format(buzzkey=buzzkey, key=key)],
"account_id":2,
    "file_format":"DELIMITED",
        "user_id_type":"AD_ID",
            "segment_key_type":"DEFAULT",
               "continent":"NAM"}

In [63]:
file_payload

{'segment_file_list': ['s3://beeswax-data-us-east-1/user-list/zynga/2019-04-13-214050_zynga-predictive-audience_file.txt'],
 'account_id': 2,
 'file_format': 'DELIMITED',
 'user_id_type': 'AD_ID',
 'segment_key_type': 'DEFAULT',
 'continent': 'NAM'}

In [64]:
s_upload = requests.post(segment_upload, cookies=r.cookies, data=json.dumps(file_payload))

In [65]:
s_upload.json()

{'success': True,
 'payload': {'id': 715},
 'message': 'segment_upload created with ID = 715'}

In [66]:
s_upload.json()['payload']['id']

715

In [67]:
s_upload_status = requests.get(segment_upload, cookies=r.cookies, data=json.dumps({'segment_upload_id' : s_upload.json()['payload']['id']}))

In [68]:
s_upload_status.json()

{'success': True,
 'payload': [{'account_id': 2,
   'continent': 'NAM',
   'create_date': '2019-04-13 22:40:57',
   'created_by_user_id': 10,
   'data_provider_key': None,
   'datacenter': None,
   'file_format': 'DELIMITED',
   'file_name': None,
   'operation_type': 'ADD_SEGMENTS',
   'overwrite': False,
   'partner_handle': 'zynga',
   'path_to_file': None,
   'segment_file_list': ['s3://beeswax-data-us-east-1/user-list/zynga/2019-04-13-214050_zynga-predictive-audience_file.txt'],
   'segment_key_type': 'DEFAULT',
   'segment_upload_id': 715,
   'size_in_bytes': None,
   'update_date': '2019-04-13 22:40:57',
   'upload_complete_date': '2019-04-13 22:40:57',
   'upload_message': None,
   'upload_status': 1,
   'user_id_type': 'AD_ID',
   'buzz_key': 'zynga'}]}