In [1]:
import pandas as pd
import numpy as np

# Sample customer journey data
data = pd.DataFrame({
    'customer_id': [1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4],
    'touchpoint': ['A', 'B', 'A', 'C', 'B', 'C', 'D', 'A', 'B', 'C', 'D'],
    'conversion': [0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0]
})

# Create a transition matrix
transition_matrix = pd.crosstab(data['touchpoint'].shift(), data['touchpoint'], normalize='index')
transition_matrix = transition_matrix.fillna(0).values

# Initialize the attribution vector
attribution_vector = np.zeros(transition_matrix.shape[0])

# Set the last touchpoint as the conversion point
conversion_point = data['touchpoint'].iloc[-1]

# Find the index of the conversion point
conversion_index = data['touchpoint'].unique().tolist().index(conversion_point)

# Set the attribution score for the conversion point to 1
attribution_vector[conversion_index] = 1

# Propagate the attribution scores backward through the transition matrix
for i in range(len(data)-2, -1, -1):
    touchpoint = data['touchpoint'].iloc[i]
    touchpoint_index = data['touchpoint'].unique().tolist().index(touchpoint)
    next_touchpoint = data['touchpoint'].iloc[i+1]
    next_touchpoint_index = data['touchpoint'].unique().tolist().index(next_touchpoint)
    attribution_vector[touchpoint_index] += attribution_vector[next_touchpoint_index] * transition_matrix[touchpoint_index, next_touchpoint_index]

# Print the attribution scores
touchpoints = data['touchpoint'].unique().tolist()
for i, touchpoint in enumerate(touchpoints):
    print(f"Attribution for Touchpoint {touchpoint}: {attribution_vector[i]}")


Attribution for Touchpoint A: 2.161459127165574
Attribution for Touchpoint B: 1.788142051516537
Attribution for Touchpoint C: 2.01920438957476
Attribution for Touchpoint D: 1.2962962962962963
