In [None]:
import numpy as np
import pandas as pd

campaign_data = pd.read_csv("cashback_activation_data.csv")

campaign_data = campaign_data.sort_values(['customer_id', 'timestamp'],
                    ascending=[False, True])

campaign_data['visit_order'] = campaign_data.groupby('customer_id').cumcount() + 1

In [None]:
# Step 1: Convert all interactions to a list
journeys = campaign_data.groupby('customer_id')['channel'].aggregate(
    lambda x: x.tolist()).reset_index()

# Step 2: Add last interaction as 1 or 0 event representing activation
activation_results = campaign_data.drop_duplicates('customer_id', keep='last')[['customer_id', 'activation']]
journeys = pd.merge(journeys, activation_results, how='left', on='customer_id')

# Step 3: Add start and end states based on whether customer activated
journeys['path'] = np.where(
    journeys['activation'] == 0,
    journeys['channel'].apply(lambda x: ["Start"] + x + ["Null"]),
    journeys['channel'].apply(lambda x: ["Start"] + x + ["Activation"])
)

journeys = journeys[['customer_id', 'path']]

# Get overall activation rate
total_activations = journeys['path'].apply(lambda x: x[-1]).str.match('Activation').sum()
activation_rate = total_activations / journeys.shape[0]

In [None]:
# Function to create intermediate path strings
def transition_states(paths):
    unique_channels = set(x for element in paths for x in element)
    transition_states = {x + '>' + y: 0 for x in unique_channels for y in unique_channels}

    for possible_state in unique_channels:
        if possible_state not in ['Activation', 'Null']:
            for user_path in paths:
                if possible_state in user_path:
                    indices = [i for i, s in enumerate(user_path) if possible_state in s]
                    for col in indices:
                        transition_states[user_path[col] + '>' + user_path[col + 1]] += 1

    return transition_states

# Function to calculate transition probabilities
def transition_prob(trans_dict, paths):
    unique_channels = set(x for element in paths for x in element)
    trans_prob = dict()
    for state in unique_channels:
        if state not in ['Activation', 'Null']:
            counter = 0
            index = [i for i, s in enumerate(trans_dict) if state + '>' in s]
            for col in index:
                if trans_dict[list(trans_dict)[col]] > 0:
                    counter += trans_dict[list(trans_dict)[col]]
            for col in index:
                if trans_dict[list(trans_dict)[col]] > 0:
                    state_prob = float((trans_dict[list(trans_dict)[col]])) / float(counter)
                    trans_prob[list(trans_dict)[col]] = state_prob

    return trans_prob

# Function to create transition matrix
def transition_matrix(list_of_paths, transition_probabilities):
    trans_matrix = pd.DataFrame()
    unique_channels = set(x for element in list_of_paths for x in element)

    for channel in unique_channels:
        trans_matrix[channel] = 0.00
        trans_matrix.loc[channel] = 0.00
        trans_matrix.loc[channel][channel] = 1.0 if channel in ['Activation', 'Null'] else 0.0

    for key, value in transition_probabilities.items():
        origin, destination = key.split('>')
        trans_matrix.at[origin, destination] = value

    return trans_matrix

trans_states = transition_states(journeys['path'])
trans_prob = transition_prob(trans_states, journeys['path'])
trans_matrix = transition_matrix(journeys['path'], trans_prob)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from markovchain import MarkovChain

mc = MarkovChain(trans_matrix.values, trans_matrix.index)
mc.draw()

In [None]:
# https://github.com/NaysanSaran/markov-chain

In [None]:
def removal_effects(df, conversion_rate):
    removal_effects_dict = {}
    channels = [channel for channel in df.columns if channel not in ['Start',
                                                                     'Null',
                                                                     'Activation']]
    for channel in channels:
        removal_df = df.drop(channel, axis=1).drop(channel, axis=0)
        for column in removal_df.columns:
            row_sum = np.sum(list(removal_df.loc[column]))
            null_pct = float(1) - row_sum
            if null_pct != 0:
                removal_df.loc[column]['Null'] = null_pct
            removal_df.loc['Null']['Null'] = 1.0

        R = removal_df[
            ['Null', 'Activation']].drop(['Null', 'Activation'], axis=0)
        
        Q = removal_df.drop(
            ['Null', 'Activation'], axis=1).drop(['Null', 'Activation'], axis=0)
        
        I = np.identity(len(Q.columns))

        N = np.linalg.inv(
            I - Q.to_numpy()
        )
        
        removal_dot_prod = np.dot(N, R.to_numpy())
        removal_cvr = pd.DataFrame(removal_dot_prod,
                                   index=R.index)[[1]].loc['Start'].values[0]
        removal_effect = 1 - removal_cvr / conversion_rate
        removal_effects_dict[channel] = removal_effect

    return removal_effects_dict


removal_effects_dict = removal_effects(trans_matrix, activation_rate)

def removal_effect_pct(removal_effects, total_activations):
    re_sum = np.sum(list(removal_effects.values()))

    return {k: (v / re_sum) * total_activations for k, v in removal_effects.items()}


attributions = removal_effect_pct(removal_effects_dict, total_activations)

In [None]:
df_multi = pd.DataFrame({
    'Channel': attributions.keys(),
    'Attribution style': 'Journey',
    'Activations': attributions.values()
})

df_first = pd.DataFrame({
    'Channel': attributions.keys(),
    'Attribution style': 'First touchpoint'
})

df_first['Activations'] = df_first['Channel'].map(
    journeys.loc[
        journeys.path.apply(lambda x: x[-1]) == 'Activation', 
        'path'
    ].apply(lambda x: x[1]).value_counts().to_dict()
)

df_last = pd.DataFrame({
    'Channel': attributions.keys(),
    'Attribution style': 'Last touchpoint'
})

df_last['Activations'] = df_last['Channel'].map(
    journeys.loc[
        journeys.path.apply(lambda x: x[-1]) == 'Activation', 
        'path'
    ].apply(lambda x: x[-2]).value_counts().to_dict()
)

df_plot = pd.concat([df_multi, df_first, df_last], axis=0)

sns.set_style("darkgrid", {"axes.facecolor": ".9"})

color_codes = ['#2653de','#6989EA','#98AEF0']

sns.catplot(
    data=df_plot, kind="bar",
    x="Channel", y="Activations", hue="Attribution style",
    palette=sns.color_palette(color_codes),
    height=6, aspect=12/8
)

In [None]:
df_scatter = df_multi.copy()

df_scatter['Coverage'] = df_scatter['Channel'].map(
    campaign_data.groupby('channel')['customer_id'].nunique().to_dict()
)

df_scatter['Total Clicks'] = df_scatter['Channel'].map(
    journeys['path'].apply(lambda x: x[-2]).value_counts().to_dict()
)

df_scatter['Activation Clicks'] = df_scatter['Channel'].map(
    journeys.loc[
        journeys.path.apply(lambda x: x[-1]) == 'Activation', 
        'path'
    ].apply(lambda x: x[-2]).value_counts().to_dict()
)

df_scatter['Attribution'] = df_scatter['Activations'] / df_scatter['Activations'].sum()
df_scatter['Activation Rate'] = df_scatter['Activations'] / df_scatter['Coverage']
df_scatter['Click Activation Rate'] = df_scatter['Activation Clicks'] / df_scatter['Total Clicks']


In [None]:

plt.figure(figsize=(10,5))
sns.scatterplot(data=df_scatter, x='Click Activation Rate', y='Activation Rate', s=200, color='#2653de')

for line in range(0, df_scatter.shape[0]):
     plt.text(df_scatter['Click Activation Rate'][line]+0.001, df_scatter['Activation Rate'][line], 
     df_scatter['Channel'][line], horizontalalignment='left', 
     size='medium', color='black', weight='semibold')

In [None]:
def expected_steps(df):    
    Q = df.drop(
        ['Null', 'Activation'], axis=1).drop(['Null', 'Activation'], axis=0)

    I = np.identity(Q.shape[1])

    N = np.linalg.inv(I - Q.to_numpy())
    
    t = np.sum(N, axis=1)
    
    return {v:t[i] for i,v in enumerate(Q.index) if v != 'Start'}

steps = expected_steps(trans_matrix)
df_steps = pd.DataFrame({
    'Channel': steps.keys(),
    'Next Touchpoints': steps.values()
}).sort_values('Next Touchpoints', ascending=False)

sns.barplot(data=df_steps, x='Channel', y='Next Touchpoints', color='#2653de')