In [1]:
import pandas as pd
import numpy as np

import sys, os
import aiohttp
import asyncio
import requests
import json


# API Request for IHC Webservice Computation

## Functions

In [2]:
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(NpEncoder, self).default(obj)

## This function chunks the given list or array in desired chunk sizes
def chunkList(initialList, chunkSize):
    """
    This function chunks a list into sub lists
    that have a length equals to chunkSize.

    Example:
    lst = [3, 4, 9, 7, 1, 1, 2, 3]
    print(chunkList(lst, 3))
    returns
    [[3, 4, 9], [7, 1, 1], [2, 3]]
    """
    finalList = []
    for i in range(0, len(initialList), chunkSize):
        finalList.append(initialList[i:i+chunkSize])
    return finalList




     The maximum number of customer journeys we can send per request is limited with 100, besides that maximum total number of sessions that can be sent is 3000. That's why, if we want to compute big test customer journeys sets, we have to split our dataset and send them separately.
     I am using here an asynchron function which sends api requests simultaneously in order to speed up the process. First function "get_tasks" gathers all of the requests in one task list. The second function "get_results" sends the requests at the same time and returns the resuls as pandas dataframe.
     

In [3]:
## function which gather api requests        
def get_tasks(session, cj, api_key, conv_type, redist = False):
    """
    session = aiohttp client session, 
    api_key = our api key,
    cj = customer journey set,which we want to evaluate,
    conv_type = conversion type name in our IHC Webservice,
    redist = boolean for redistribution parameter
    """
    # task list
    tasks = []
    
    #chunking the conversions
    conv_chunk_list = chunkList(cj.conversion_id.unique(), 100)
    
    # api url for requests
    api_url = "https://api.ihc-attribution.com/v1/compute_ihc?conv_type_id={conv_type_id}".format(conv_type_id = conv_type)

    ## looping through in the chunked conversions
    for chunk in conv_chunk_list:
        ## masking the test data set
        df_select = cj[cj.conversion_id.isin(chunk)]
        cj_chunk = json.loads(df_select.to_json(orient="records"))
        

        ## redistribution parameter
        if redist==False :
            body = { 'customer_journeys': cj_chunk  }
            

        else:
            ####
            redistribution_parameter = {
                                'initializer' : {
                                    'direction' : 'any_session',
                                    'receive_threshold' : 0,
                                    'redistribution_channel_labels' : ["Direct"],
                                },
                                'holder' : {
                                    'direction' : 'any_session',
                                    'receive_threshold' : 0.01,
                                    'redistribution_channel_labels' :  ["Direct"],
                                },
                                'closer' : {
                                    'direction' : 'any_session',
                                    'receive_threshold' : 0.01,
                                    'redistribution_channel_labels' : ["Direct"],
                                }
            }

            body = {
                'customer_journeys': cj_chunk,
                'redistribution_parameter': redistribution_parameter
            }

        ## appending the api request to the task list 
        tasks.append(session.post( api_url, data=json.dumps(body, cls=NpEncoder), 
                headers= {
                    'Content-Type': 'application/json',    
                    'x-api-key': api_key
                }  ) )
        
    return tasks


In [4]:
## asynchron function which sends all requests simultaneously
async def get_results(cj, api_key, conv_type, redist):
    """
    api_key = our api key,
    cj = customer journey set,which we want to evaluate,
    conv_type = conversion type name in our IHC Webservice,
    redist = boolean for redistribution parameter
    """
    df_all = pd.DataFrame()
    
    ## asynchron part
    async with aiohttp.ClientSession() as session:
        ## creating task list
        tasks = get_tasks(session, cj, api_key, conv_type, redist)
        responses =  await asyncio.gather(*tasks)
        
        for response in responses:
            x = await response.json()
            
            try:
                df_all = df_all.append(pd.DataFrame(x['value']))
            except:
                print(x)
            
    return  df_all

### Getting The Results

In [9]:
## Defining Function Parameters
api_key = 'your_api'
df_test = pd.read_csv("datasets/cj_nl_new_test.csv")
conv_type = "conv_type_name"

     IHC Webservice APIs can take an argument called redistribution parameter. In this notebook I will not cover this parameter, but if you want to learn more about this parameter please take a look at the notebook "IHC 4 - Test Set Evaluation".
     Remember that the test we create in the previous notebook contains 3000 conversions. The function "get_tasks" split the conversion into small groups with 100 (which is the maximum limit per API) conversions. Therefore, in order to compute whole test set, we are going to need 30 API requests in total.

In [8]:
## Evaluating
df_results = await get_results(cj = df_test, 
                                  api_key = api_key, 
                                  conv_type = conv_type, 
                                  redist = False)
df_results.head()

Unnamed: 0,conversion_id,session_id,initializer,holder,closer,ihc
0,b5455601a5,6f248ba6a7b27d85a599,1.0,0.0,0.0,0.331
1,b5455601a5,c37d81c07c8656907024,0.0,0.4322,0.0,0.1634
2,b5455601a5,aa3ddaf73f8c6064bce9,0.0,0.3807,0.2776,0.2247
3,b5455601a5,d98f8e5b45ee011db2d7,0.0,0.1871,0.7224,0.2809
4,12c7fb69ef,ee500aae2f2c0347c2b6,0.0,0.0,0.0,0.0


     Since our results after request contains only IHC values, we have to merge it with the original test set to make the further analysis. 

In [10]:
#merging results and inital cj set 
df_merged = pd.merge(df_results, df_test, on = ['conversion_id', 'session_id'], how = 'inner')
df_merged.head()

Unnamed: 0,conversion_id,session_id,initializer,holder,closer,ihc,timestamp,channel_label,holder_engagement,closer_engagement,conversion
0,b5455601a5,6f248ba6a7b27d85a599,1.0,0.0,0.0,0.331,2021-04-04 23:34:51,Shopping - Non Brand,0,0,0
1,b5455601a5,c37d81c07c8656907024,0.0,0.4322,0.0,0.1634,2021-04-04 23:36:18,SEO,1,0,0
2,b5455601a5,aa3ddaf73f8c6064bce9,0.0,0.3807,0.2776,0.2247,2021-04-04 23:39:17,Shopping - Non Brand,1,1,0
3,b5455601a5,d98f8e5b45ee011db2d7,0.0,0.1871,0.7224,0.2809,2021-04-05 09:32:30,Direct,1,1,1
4,12c7fb69ef,ee500aae2f2c0347c2b6,0.0,0.0,0.0,0.0,2020-12-26 12:54:39,Social Paid,0,0,0


## Channel Contribution Table

     This table is a very gentle start to analyzing the IHC Attribution results. If you want to learn more about IHC evaluation in channel level you can check the "IHC 4 - Test Set Evaluation" notebook.

In [11]:
ihc_cols = ["initializer", "holder", "closer", "ihc"]

df_merged.groupby("channel_label")[ihc_cols]\
            .sum().apply(lambda x: 100*x/x.sum())\
            .sort_values("ihc", ascending = False)\
            .round(3).style\
            .background_gradient("Reds")\
            .format(formatter = "% {:.2f}")

Unnamed: 0_level_0,initializer,holder,closer,ihc
channel_label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Shopping - Non Brand,% 25.86,% 23.89,% 18.84,% 23.07
Direct,% 17.16,% 19.78,% 23.85,% 20.10
SEO,% 13.56,% 16.10,% 17.68,% 15.72
SEA - Brand,% 13.25,% 10.00,% 8.38,% 10.60
Email,% 6.74,% 8.25,% 9.83,% 8.21
Social Paid,% 9.28,% 5.54,% 5.21,% 6.68
SEA - Non Brand,% 6.27,% 5.27,% 4.44,% 5.36
Referral,% 1.20,% 6.19,% 7.27,% 4.85
Affiliate,% 2.04,% 2.06,% 1.87,% 2.00
Social Remarketing,% 2.08,% 1.37,% 1.42,% 1.62
