In [2]:
%reload_ext ishbook
import pandas as pd
import plus
import datetime as dt

## Pull in the advertiser report provided from Luis

- This is taken from the adCentral report that is run bi-weekly on Tuesdays/Thursdays
- Exclude any rows from the df where the **Advertiser ID** are null (causes issues when creating a string of the IDs to feed in the IQL pulls)
- Check the len of the df before and after to make sure those rows were dropped

In [3]:
adCentral_csv = pd.read_csv("SCS EMEA B 915.csv")
print len(adCentral_csv)

280


In [4]:
adCentral_csv = adCentral_csv[pd.notnull(adCentral_csv['Advertiser ID'])]
print len(adCentral_csv)

268


## Set Advertiser ID column as int and create a string listed for IQL Query

- Advertiser ID comes in as object so needs to be converted

In [5]:
adCentral_csv['Advertiser ID'] = adCentral_csv['Advertiser ID'].astype(int)

In [6]:
advids = ",".join(str(x) for x in adCentral_csv["Advertiser ID"])

## Checking for Indexed / Hosted Clicks & Dradis Jobs
- Use the **adclick** index to pulll in indexed and hosted clicks
- Use the **hostedjobsnapshot** index to look at any advertisers that had Dradis jobs

In [7]:
%%ish
indexed = from adclick 2012-01-01 today WHERE advid in ({advids}) and type = "chg" feedid !=50461 group by advid/*Advertiser ID*/ select count() /*sponsored indexed Clicks*/
output = None
dradis = from adclick 2012-01-01 today WHERE advid in ({advids}) and type = "chg" feedid =50461 group by advid/*Advertiser ID*/ select count() /*sponsored dradis Clicks*/
output = None
dradisjobs = from hostedjobsnapshot 2014-07-01 today WHERE advertiserid in ({advids}) and status = ('active') group by advertiserid/*Advertiser ID*/ select count() /*hosted jobs*/
output = None

## Merge the adCentral df with the output from the IQL queries above
- This could probably be done in one line but for the sake of cleanliness I merged it on three separate lines

In [8]:
first_merge = adCentral_csv.merge(indexed, how = "left", left_on = "Advertiser ID", right_on = "Advertiser ID")
second_merge = first_merge.merge(dradis, how = "left", left_on = "Advertiser ID", right_on = "Advertiser ID")
final_merge = second_merge.merge(dradisjobs, how = "left", left_on = "Advertiser ID", right_on = "Advertiser ID")
final_merge = final_merge[['Advertiser ID','Account ID','sponsored indexed Clicks','sponsored dradis Clicks','hosted jobs']]

## Pull out any advertisers with sponsored jobs and hosted jobs only

In [9]:
final_merge = final_merge[pd.isnull(final_merge['sponsored indexed Clicks'])]
final_merge = final_merge[pd.notnull(final_merge['hosted jobs'])]

In [10]:
final_merge.tail()

Unnamed: 0,Advertiser ID,Account ID,sponsored indexed Clicks,sponsored dradis Clicks,hosted jobs
263,7991164,247009341.0,,,276.0
264,8026276,247378776.0,,,369.0
265,8066349,247874157.0,,,216.0
266,8070863,247920054.0,,,83.0
267,8072787,247938256.0,,,162.0


In [11]:
len(final_merge)

183

## Set the Assignment ID to 3271 for all the remaning rows and add the date
- Create a new column labeled **Assignment ID** and set it equal to 3271
- Create a separate column **Date** and set it to today's date using the datetime function

In [12]:
final_merge['New User Id'] = 3271
final_merge["Relationship"] = "SERVICE_REP"

In [13]:
final_merge['Date Assignment Starts'] = dt.datetime.today().strftime("%m/%d/%y")

In [14]:
final_merge

Unnamed: 0,Advertiser ID,Account ID,sponsored indexed Clicks,sponsored dradis Clicks,hosted jobs,New User Id,Relationship,Date Assignment Starts
1,2870701,180889518.0,,27593.0,705.0,3271,SERVICE_REP,09/15/16
2,5286636,215212711.0,,49583.0,1851.0,3271,SERVICE_REP,09/15/16
3,140420,67845114.0,,31031.0,1076.0,3271,SERVICE_REP,09/15/16
4,2542763,175445944.0,,2199.0,481.0,3271,SERVICE_REP,09/15/16
6,2102204,166382083.0,,15086.0,1992.0,3271,SERVICE_REP,09/15/16
10,318056,98057170.0,,7723.0,948.0,3271,SERVICE_REP,09/15/16
13,4681940,208045916.0,,908.0,2716.0,3271,SERVICE_REP,09/15/16
21,88997,48245402.0,,,1904.0,3271,SERVICE_REP,09/15/16
26,190074,82706742.0,,,1557.0,3271,SERVICE_REP,09/15/16
27,195843,83844598.0,,,1373.0,3271,SERVICE_REP,09/15/16


## Set the Account ID to an int since it is float, this will allow the file to be uploaded to AdCentral

In [15]:
final_merge['Account Id'] = final_merge['Account ID'].apply(lambda x: int(x))
final_merge = final_merge[['Account Id','Relationship', 'New User Id', 'Date Assignment Starts']]
print final_merge.tail(10)
print len(final_merge)

     Account Id Relationship  New User Id Date Assignment Starts
258   246131170  SERVICE_REP         3271               09/15/16
259   246276183  SERVICE_REP         3271               09/15/16
260   246311700  SERVICE_REP         3271               09/15/16
261   246449714  SERVICE_REP         3271               09/15/16
262   246779224  SERVICE_REP         3271               09/15/16
263   247009341  SERVICE_REP         3271               09/15/16
264   247378776  SERVICE_REP         3271               09/15/16
265   247874157  SERVICE_REP         3271               09/15/16
266   247920054  SERVICE_REP         3271               09/15/16
267   247938256  SERVICE_REP         3271               09/15/16
183


## Export the finalized output to CSV for Luis to upload into adCentral
- Set the index to false since we don't need that in the final output

In [16]:
final_merge.to_csv('SCS EMEA B 915 FINAL.csv', index=False)