In [None]:
from scipy.stats import binom
import matplotlib.pyplot as plt
from pprint import pprint
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import random
import statistics

In [None]:
df = pd.read_csv('attribution_data.csv')
df

In [None]:
df.info()

In [None]:
#First Touch
temp = df[df['conversion'] == 1]
first_touch = pd.DataFrame(df.groupby('cookie').first(), index=df['cookie'])
cookie_index = list(temp['cookie'])
first_touch_inter = first_touch.loc[cookie_index]
first_touch_final = pd.DataFrame(round(first_touch_inter['channel'].value_counts(normalize= True) * 100, 2))
first_touch_final.columns = ['Weightage(%)']
first_touch_final

In [None]:
# Set the figure size for the plot
plt.subplots(figsize=(18, 6))

# Create a bar plot using Seaborn
ax = sns.barplot(y='Weightage(%)', x=first_touch_final.index, data=first_touch_final, color='skyblue')
ax.bar_label(ax.containers[0], fontsize=10)

# Display the plot
plt.show()

In [None]:
#Last Touch
last_touch_final = pd.DataFrame(round(temp['channel'].value_counts(normalize=True) * 100, 2))
last_touch_final.columns = ['Weightage(%)']
last_touch_final

In [None]:
# Set the figure size for the plot
plt.subplots(figsize=(18, 6))

# Create a bar plot using Seaborn
ax = sns.barplot(y='Weightage(%)', x=last_touch_final.index, data=last_touch_final, color='skyblue')
ax.bar_label(ax.containers[0], fontsize=10)

# Display the plot
plt.show()

In [None]:
from collections import Counter

def transition_states(list_of_paths):

    transitions = Counter()
    for path in list_of_paths:
        transitions.update(f"{path[i]}>{path[i+1]}" for i in range(len(path) - 1))
    return dict(transitions)

In [None]:
from collections import defaultdict

def transition_prob(trans_dict, list_of_paths):
    # Create a set of unique channels from all user paths
    list_of_unique_channels = set(x for element in list_of_paths for x in element)

    # Initialize a defaultdict to store transition probabilities
    trans_prob = defaultdict(dict)

    for state in list_of_unique_channels:
        if state not in ['Conversion', 'Null']:
            counter = 0
            index = [i for i, s in enumerate(trans_dict) if state + '>' in s]

            for col in index:
                if trans_dict[list(trans_dict)[col]] > 0:
                    counter += trans_dict[list(trans_dict)[col]]

            for col in index:
                if trans_dict[list(trans_dict)[col]] > 0:
                    state_prob = float((trans_dict[list(trans_dict)[col]])) / float(counter)
                    trans_prob[list(trans_dict)[col]] = state_prob

    return trans_prob


In [None]:
def transition_matrix(list_of_paths, transition_probabilities):
    # Create an empty transition matrix as a DataFrame
    trans_matrix = pd.DataFrame()
    
    # Create a set of unique channels from all user paths
    list_of_unique_channels = set(x for element in list_of_paths for x in element)

    # Initialize the transition matrix with zeros
    for channel in list_of_unique_channels:
        trans_matrix[channel] = 0.00
        trans_matrix.loc[channel] = 0.00
        trans_matrix.loc[channel][channel] = 1.0 if channel in ['Conversion', 'Null'] else 0.0

    # Fill the transition matrix with transition probabilities
    for key, value in transition_probabilities.items():
        origin, destination = key.split('>')
        trans_matrix.at[origin, destination] = value

    return trans_matrix

In [None]:
def removal_effects(dt, conversion_rate):
    # Calculate the effect of removing each channel
    removal_effects_dict = {}
    channels = [channel for channel in dt.columns if channel not in ['Start', 'Null', 'Conversion']]
    
    for channel in channels:
        # Create a DataFrame with the channel removed
        removal_dt = dt.drop(channel, axis=1).drop(channel, axis=0)
        
        # Calculate the row sums and null percentages
        for column in removal_dt.columns:
            row_sum = np.sum(list(removal_dt.loc[column]))
            null_pct = float(1) - row_sum
            if null_pct != 0:
                removal_dt.loc[column]['Null'] = null_pct
            removal_dt.loc['Null']['Null'] = 1.0

        # Separate data for conversion and non-conversion
        removal_to_conv = removal_dt[['Null', 'Conversion']].drop(['Null', 'Conversion'], axis=0)
        removal_to_non_conv = removal_dt.drop(['Null', 'Conversion'], axis=1).drop(['Null', 'Conversion'], axis=0)

        # Calculate the inverse difference
        removal_inv_diff = np.linalg.inv(np.identity(len(removal_to_non_conv.columns)) - np.asarray(removal_to_non_conv))
        
        # Calculate the dot product
        removal_dot_prod = np.dot(removal_inv_diff, np.asarray(removal_to_conv))
        
        # Calculate the conversion rate
        removal_cvr = pd.DataFrame(removal_dot_prod, index=removal_to_conv.index)[[1]].loc['Start'].values[0]
        
        # Calculate the removal effect
        removal_effect = 1 - removal_cvr / conversion_rate
        removal_effects_dict[channel] = removal_effect

    return removal_effects_dict


In [None]:
def markov_chain_allocations(removal_effects, total_conversions):
    # Calculate the sum of removal effects
    re_sum = np.sum(list(removal_effects.values()))
    
    # Calculate allocations using removal effects and total conversions
    allocations = {k: (v / re_sum) * total_conversions for k, v in removal_effects.items()}
    
    return allocations

In [None]:
def markov_model(df, conv_col, channel_col, user_id):
    
    pd.options.mode.chained_assignment = None 
    df = df.sort_values(user_id)
    df['visit_order'] = df.groupby(user_id).cumcount() + 1

    df_paths = df.groupby(user_id)[channel_col].aggregate(lambda x: x.unique().tolist()).reset_index()
    df_last_interaction = df.drop_duplicates(user_id, keep='last')[[user_id, conv_col]]
    df_paths = pd.merge(df_paths, df_last_interaction, how='left', on=user_id)

    df_paths['start'] = [["Start"] for i in range(len(df_paths[conv_col]))]
    df_paths['buff'] = [["Conversion"] for i in range(len(df_paths[conv_col]))]
    df_paths['null'] = [["Null"] for i in range(len(df_paths[conv_col]))]


    df_paths['path'] = np.where(df_paths[conv_col] == 0, df_paths['start'] + df_paths[channel_col] + df_paths['null'], df_paths['start'] + df_paths[channel_col] + df_paths['buff'])
    df_paths = df_paths[[user_id, 'path']]

    list_of_paths = df_paths['path']
    total_conversions = df_paths['path'].apply(lambda s: s.count('Conversion')).sum()
    base_conversion_rate = total_conversions / len(list_of_paths)

    trans_states = transition_states(list_of_paths)
    trans_prob = transition_prob(trans_states, list_of_paths)
    trans_matrix = transition_matrix(list_of_paths, trans_prob)
    removal_effects_dict = removal_effects(trans_matrix, base_conversion_rate)    #Creating a dictionary of the removal effect
    
    print("Removal Effects by Channel:")
    for k, v in sorted(removal_effects_dict.items(), key=lambda x: x[1]):
        print(f"{k}: {v:.4f}")
    
    Least_impact_channel = min(removal_effects_dict, key=removal_effects_dict.get)
    most_impact_channel = max(removal_effects_dict, key=removal_effects_dict.get)

    print(Least_impact_channel)
    print(most_impact_channel)
    
    attributions = markov_chain_allocations(removal_effects_dict, total_conversions)    #Allocating markov chains
    res_markov=pd.DataFrame(attributions.values(),index=attributions.keys())
    res_markov.columns=['weightage']
    sum=res_markov['weightage'].sum()
    res_markov['Weightage(%)']=res_markov.apply(lambda x: round((x['weightage']/sum)*100,2),axis=1)
    res_markov.drop(['weightage'], axis=1,inplace=True)
    res_markov=res_markov.sort_index()
    return res_markov  

In [None]:
markov = markov_model(df, 'conversion', 'channel', 'cookie')
markov = markov.sort_values(by='Weightage(%)', ascending=False)

In [None]:
# Set the figure size for the plot
plt.subplots(figsize=(18, 6))

# Create a bar plot using Seaborn
ax = sns.barplot(y='Weightage(%)', x=markov.index, data=markov, color='skyblue')
ax.bar_label(ax.containers[0], fontsize=10)

# Display the plot
plt.show()