In [15]:
import requests
import json

refresh_token = ''
client_id = ''
server_id = 8731

## Authentication - Generating an access token

In [16]:
tokens_url = 'https://services.kenshoo.com/api/v1/token'
headers = {'content-type': 'application/x-www-form-urlencoded'}
payload = ""

r = requests.post(tokens_url, data=payload, headers=headers)
r_payload = r.text

print(f"Status: {r.status_code}")
print(f"Response payload: {r_payload}")
print(f"URL: {r.url}")

auth_token = json.loads(r_payload)['access_token']

Status: 200
Response payload: {"email":"edgar@factory14.com","access_token":"eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJlZGdhckBmYWN0b3J5MTQuY29tIiwiZXhwIjoxNjM4OTcwNjY0LCJpc3MiOiJodHRwOi8va2Vuc2hvby5jb20iLCJ1c2VyaWQiOjEzNDMyNSwiYWdlbmN5SWQiOjM1OTIsIm5hbWUiOiJFZGdhciIsInJvbGVzIjpbIkFkbWluIl0sImJpbGxpbmdJZCI6OTY2OTg0LCJhcGljIjoiOTY2OTg0Iiwib3JpIjoiYXBpIn0.tQ6Z2Ku7nZ1WLrHUaEm3MFZgrBrKeQX2TGU45BZytqQ"}
URL: https://services.kenshoo.com/api/v1/token


## Async Reports - Run a Report

In [25]:
base_url = 'https://services.kenshoo.com/api/v1/reports/async'
headers = {'Authorization': f'Bearer {auth_token}', 'content-type': 'application/json'}

params = {'ks': server_id}

fields = [{"name":"Profile ID"},{"name":"Profile Name"},{"name":"Profile Currency"},
          {"name":"Status"},{"name":"Channel"},{"name":"Campaign Name"},{"name":"Campaign ID"},{"name":"Date"},
          {"name":"Impressions"},{"name":"Clicks"},{"name":"Conversions"},{"name":"Cost"},{"name":"Rev."},
          {"name":"Imp. Share"},{"name": "Conv. Rate"}]

data = {"template_name": "Fusion: Campaigns",
        "start_date": "2021-01-01", "end_date": "2021-12-06", 
        "currency": "USD", "custom_file_name": "Reporte Name Tests",
        "compress_method": "ZIP", "delimiter": "Tab",
        "require_yesterday_performance": "false",
        "include_revenue_columns": "false",
        "fields": fields}

r = requests.post(base_url, headers=headers, params=params, data=json.dumps(data))
r_payload = r.text

print(f"Status: {r.status_code}")
print(f"Response payload: {r_payload}")
print(f"URL: {r.url}")

Status: 200
Response payload: {"run_id":"rpx-4d5cc853-168f-4e73-9a2a-ae3458b3491d"}
URL: https://services.kenshoo.com/api/v1/reports/async?ks=8731


In [26]:
execution_id = json.loads(r_payload)['run_id']

## Async Reports - Poll report execution status

In [32]:
base_url = f'https://services.kenshoo.com/api/v1/reports/async/{execution_id}/status'
headers = {'Authorization': f'Bearer {auth_token}'}
params = {'ks': server_id}

r = requests.get(base_url, headers=headers, params=params)
r_payload = r.text

print(f"Status: {r.status_code}")
print(f"Response payload: {r_payload}")
print(f"URL: {r.url}")

Status: 200
Response payload: {"status":"COMPLETED"}
URL: https://services.kenshoo.com/api/v1/reports/async/rpx-4d5cc853-168f-4e73-9a2a-ae3458b3491d/status?ks=8731


## Async Reports - Download completed report file

In [33]:
def download_file(url):
    local_filename = url.split('/')[-1]
    # NOTE the stream=True parameter below
    with requests.get(url, headers=headers, params=params, stream=True) as r:
        r.raise_for_status()
        with open(local_filename, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192): 
                # If you have chunk encoded response uncomment if
                # and set chunk_size parameter to None.
                #if chunk: 
                f.write(chunk)
    return local_filename

import pandas as pd
import io 

base_url = f'https://services.kenshoo.com/api/v1/reports/async/{execution_id}'
headers = {'Authorization': f'Bearer {auth_token}'}
params = {'ks': server_id}

download_file(base_url)

print(f"Status: {r.status_code}")
print(f"URL: {r.url}")
print("Done!")

Status: 200
URL: https://services.kenshoo.com/api/v1/reports/async/rpx-4d5cc853-168f-4e73-9a2a-ae3458b3491d/status?ks=8731
Done!


In [34]:
import pandas as pd
import io 

base_url = f'https://services.kenshoo.com/api/v1/reports/async/{execution_id}'
headers = {'Authorization': f'Bearer {auth_token}'}
params = {'ks': server_id}

r = requests.get(base_url, headers=headers, params=params, stream=True)

chunk_size = 128
file_content = io.BytesIO()
for chunk in r.iter_content(chunk_size=chunk_size):
    file_content.write(chunk)
        
df = pd.read_csv(file_content, compression='zip', header=0, sep='\t', quotechar='"')

print(f"Status: {r.status_code}")
print(f"URL: {r.url}")
print("Done!")

Status: 200
URL: https://services.kenshoo.com/api/v1/reports/async/rpx-4d5cc853-168f-4e73-9a2a-ae3458b3491d?ks=8731
Done!


In [35]:
df.head(1)

Unnamed: 0,Currency,Profile ID,Profile Name,Profile Currency,Status,Channel,Campaign Name,Campaign ID,Date,Impressions,Clicks,conversions,Cost,Rev.,Imp. Share,Conv. Rate
0,USD,401,Toolzilla - DE - AMZN,EUR,Paused,Amazon,TZ001 / TZ31PRO Manual,399,2021-08-12,105.0,2.0,0.0,2.59376,0.0,0.0,0.0


In [36]:
from datetime import datetime 

base_path = '/Users/emif/Downloads'
current_dt_str = datetime.now().strftime("%Y%m%d%H%M%S")

file_name = f"{base_path}/api_export_20210101000000_to_{current_dt_str}.csv"
# df.to_csv(file_name,index=False)
file_name

'/Users/emif/Downloads/api_export_20210101000000_to_20211207104642.csv'

# Upload to RAW layer

In [37]:
from datetime import datetime 
import pandas as pd
import boto3

env = 'prod'
aws_session = boto3.session.Session(profile_name=f'data-maintainer-prod')
# aws_session = boto3.session.Session(profile_name=f'data-{env}')
dest_bucket = f'f14-datalake-raw-{env}'

In [38]:
df_bkp = df.copy()
df = df.copy()
df.head()

Unnamed: 0,Currency,Profile ID,Profile Name,Profile Currency,Status,Channel,Campaign Name,Campaign ID,Date,Impressions,Clicks,conversions,Cost,Rev.,Imp. Share,Conv. Rate
0,USD,401,Toolzilla - DE - AMZN,EUR,Paused,Amazon,TZ001 / TZ31PRO Manual,399,2021-08-12,105.0,2.0,0.0,2.59376,0.0,0.0,0.0
1,USD,401,Toolzilla - DE - AMZN,EUR,Paused,Amazon,TZ003 / TZ10PRO Manual,401,2021-08-16,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,USD,401,Toolzilla - DE - AMZN,EUR,Paused,Amazon,TZ001 / TZ31PRO Auto,406,2021-08-12,56.0,0.0,0.0,0.0,0.0,0.0,0.0
3,USD,401,Toolzilla - DE - AMZN,EUR,Paused,Amazon,TZ31PRO Category 2020,409,2021-08-12,330.0,2.0,0.0,2.55855,17.745541,0.0,0.5
4,USD,401,Toolzilla - DE - AMZN,EUR,Paused,Amazon,TZ31PRO Manual 2020,410,2021-08-12,16.0,1.0,0.0,0.680715,0.0,0.0,0.0


## Aux Functions

In [39]:
def col_name_to_snake_case(df):
    new_cols_names = [col.lower().replace('.', '').replace(' ', '_').replace('date', 'dt') for col in df.columns]
    for col in df.columns:
        new_col_name = col.lower().replace('.', '').replace(' ', '_').replace('date', 'dt')
        df[new_col_name] = df[col]

    df = df[new_cols_names]
    df['aud_upd_ts'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    return df

## Main Execution

In [40]:
df = col_name_to_snake_case(df)
df.head(1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,currency,profile_id,profile_name,profile_currency,status,channel,campaign_name,campaign_id,dt,impressions,clicks,conversions,cost,rev,imp_share,conv_rate,aud_upd_ts
0,USD,401,Toolzilla - DE - AMZN,EUR,Paused,Amazon,TZ001 / TZ31PRO Manual,399,2021-08-12,105.0,2.0,0.0,2.59376,0.0,0.0,0.0,2021-12-07 10:49:42


In [41]:
import awswrangler as wr
from datetime import datetime

current_ts = datetime.now()
dest_prefix = f'skai/fusion_data_campaigns/p_creation_dt={current_ts.strftime("%Y-%m-%d")}/fusion_data_campaigns_{current_ts.strftime("%Y%m%d%H%M%S")}.jsonl'

dest_path = f"s3://{dest_bucket}/{dest_prefix}"
wr.s3.to_json(df, dest_path, orient="records", lines=True, boto3_session=aws_session)


['s3://f14-datalake-raw-prod/skai/fusion_data_campaigns/p_creation_dt=2021-12-07/fusion_data_campaigns_20211207105006.jsonl']