#### Hypothesis: Copilot training positively impacts both account-level engagement and user-level conversion rates.

In [62]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os


In [63]:
# Merged data:
merged_df = pd.read_csv("../data/merged_df.csv")

In [64]:
# Cleaning Conversion Rate:
cleaned_series = merged_df["Conversion Rate Last 30 Days"].str.replace('%', '', regex=False)
merged_df["Conversion Rate Last 30 Days"] = cleaned_series.astype(float)

In [None]:
merged_df

In [None]:
fig_box, ax_box = plt.subplots(figsize=(8, 6))

sns.boxplot(
    data=merged_df,
    x="Attended Any Copilot Training",
    y="Conversion Rate Last 30 Days",
    ax=ax_box
)

ax_box.set_title("Distribution of Conversion Rates by Training Status (Box Plot)")
ax_box.set_xlabel("Attended Any Copilot Training")
ax_box.set_ylabel("Conversion Rate (%)")
plt.tight_layout()

save_directory = os.path.join("..", "figs") 
os.makedirs(save_directory, exist_ok=True)
filename = "conversion_rate_training_box_plot.png"
save_path = os.path.join(save_directory, filename)

fig_box.savefig(save_path)

print(f"Figure successfully saved to: {save_path}")

In [None]:
fig_box, ax_box = plt.subplots(figsize=(8, 6))

sns.boxplot(
    data=merged_df,
    x="Attended Any Copilot Training",
    y="Conversion Rate Last 30 Days",
    hue="Segment",
    ax=ax_box
)

ax_box.set_title("Distribution of Conversion Rates by Training Status (Box Plot)")
ax_box.set_xlabel("Attended Any Copilot Training")
ax_box.set_ylabel("Conversion Rate (%)")
plt.tight_layout()

save_directory = os.path.join("..", "figs") 
os.makedirs(save_directory, exist_ok=True)
filename = "conversion_rate_training_box_by_segment_plot.png"
save_path = os.path.join(save_directory, filename)

fig_box.savefig(save_path)

Does not appear to Lead to much of a difference from this angle:

#### Try looking at Conversion Rate with Account Views:

In [68]:
# Force Viewed Account Updates to be numeric:
numeric_series = pd.to_numeric(
    merged_df["Viewed Account Updates Last 30 Days"], 
    errors='coerce'
)
column_mean = numeric_series.mean()

# 3. Fill the NaNs: Replace all the NaN values (the original bad data) with the calculated mean.
merged_df["Viewed Account Updates Last 30 Days"] = numeric_series.fillna(column_mean)

In [None]:
# Let's Try a Scatter Plot:
fig, ax = plt.subplots(figsize=(10, 6))

sns.scatterplot(
    data=merged_df,
    x="Viewed Account Updates Last 30 Days",
    y="Conversion Rate Last 30 Days",
    hue="Attended Any Copilot Training", # <-- This adds the color encoding
    s=100, # Size of points
    ax=ax
)

ax.set_title("Conversion Rate vs. Account Views, Colored by Training Status")
ax.set_xlabel("Viewed Account Updates Last 30 Days")
ax.set_ylabel("Conversion Rate Last 30 Days (%)")

ax.legend(title="Attended Training")
plt.tight_layout()

save_directory = os.path.join("..", "figs") 
os.makedirs(save_directory, exist_ok=True)
filename = "conversion_rate_account_views_training_scatter_plot.png"
save_path = os.path.join(save_directory, filename)

fig_box.savefig(save_path)

print(f"Figure successfully saved to: {save_path}")

In [None]:
### Simple stats:
merged_df.groupby(["Attended Any Copilot Training","Segment"]).agg({"Conversion Rate Last 30 Days": np.mean})

In [None]:
merged_df.groupby(["Attended Any Copilot Training","Sub Persona"]).agg({"Conversion Rate Last 30 Days": np.mean})

### Roll this up to the account level:

In [72]:
### Assumption Everyone on a team is trained or not trained, but we will check:

In [73]:
# Add boolean column trained
merged_df["trained"] = (
    merged_df["Attended Any Copilot Training"] == "Trained"
)

In [None]:
merged_df

In [None]:
account_trained_df = merged_df.groupby("Account ID").agg({"Conversion Rate Last 30 Days": np.mean,
                                       "trained": np.mean})\
                              .reset_index()

In [76]:
# If anyone was trained then trained > 0:
account_trained_df["trained"] = (
    account_trained_df["trained"] > 0
)

In [None]:
account_trained_df.groupby("trained").agg({"Conversion Rate Last 30 Days": np.mean})

In [None]:
fig_box, ax_box = plt.subplots(figsize=(8, 6))

sns.boxplot(
    data=merged_df,
    x="trained",
    y="Conversion Rate Last 30 Days",
    ax=ax_box
)

ax_box.set_title("Distribution of Conversion Rates by Training Status (Box Plot)")
ax_box.set_xlabel("Someone in Account Attended Any Copilot Training")
ax_box.set_ylabel("Conversion Rate (%)")
plt.tight_layout()

save_directory = os.path.join("..", "figs") 
os.makedirs(save_directory, exist_ok=True)
filename = "someone_in_account_attended_training_account_conversion_rates.png"
save_path = os.path.join(save_directory, filename)

fig_box.savefig(save_path)