# Analysis

Look into the data structure and visualise the attribution data.

In [None]:
cd ..

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from ChannelAttribution import heuristic_models, markov_model
from src.utility import clean_data, summarise_paths

In [None]:
df = pd.read_csv("data/attribution data.csv")
df = clean_data(df)

In [None]:
df_agg = summarise_paths(df)

### Plot the number of conversions over time

In [None]:
fig, ax = plt.subplots(figsize=(8, 3))

conversions = (
    df
    .query("conversion == 1")
    .groupby("date", as_index=False)
    .agg(conversions=pd.NamedAgg("conversion", "sum"))
)

ax.plot(
    conversions["date"],
    conversions["conversions"],
    marker="o",
    markersize=5,
)
ax.grid(True, alpha=.3)
plt.show()

### Apply the channel attribution modelling package

In [None]:
df_agg.iloc[:1]

In [None]:
# crete an attribution dataframe that is grouped by the path
df_attr = (
    df_agg.groupby(["path"], as_index=False)
    .agg(
        volume=pd.NamedAgg("path", "count"),
        conversion=pd.NamedAgg("conversion", "sum"),
        conversion_value=pd.NamedAgg("conversion_value", "sum"),
    )
)

In [None]:
%%time
H = heuristic_models(
    Data=df_agg,
    var_path="path",
    var_conv="conversion",
    var_value="conversion_value",
)

In [None]:
%%time
M = markov_model(
    Data=df_agg,
    var_path="path",
    var_conv="conversion",
    var_value="conversion_value",
)

In [None]:
results = pd.merge(H, M, on="channel_name", how="inner")

In [None]:
results.set_index("channel_name", inplace=True)

In [None]:
fig, ax = plt.subplots(figsize=(10, 5))

ax.bar(
    results.index,
    results
)

In [None]:
results.filter(like="value")