In [2]:
%load_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.stats.proportion import proportions_ztest, proportion_confint
from logic import load_contacts, row_to_prospect, plot_booking_rate, total_touches_analysis
from search import ProspectSearcher

emails_per_person = 4.419
sequences_seen = 1.202

df = load_contacts("../cleaned-data-main-seq-25.csv")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
# Check significance between number of sequences enrolled and meeting booked rate
# Results - 7 sequences were statistically significant in a good way
def amount_by_sequence(df):
    amt_of_sequences = (
        df.groupby("Number of Sequences Enrolled")["Meeting Booked"]
            .agg(["count", "sum"])
            .reset_index()
        )
    amt_of_sequences["Rate"] = amt_of_sequences["sum"] / amt_of_sequences["count"] * 100
    return amt_of_sequences
amt_of_sequences = amount_by_sequence(df)

def overall_booking_rate(df):
    total_success = (df["Meeting Booked"] != 0).sum()
    total_contacts = len(df)
    overall_rate = total_success / total_contacts * 100
    return f"Overall booking rate: {overall_rate}.3%"
overall_booking_rate(df)

'Overall booking rate: 0.572309046885318.3%'

In [10]:
results = []
for _, row in amt_of_sequences.iterrows():
    n = row["count"]
    x = row["sum"]
    if n < 15:
        continue
    stat, pval = proportions_ztest([x, total_success], [n, total_contacts])
    results.append({
        "Sequences": row["Number of Sequences Enrolled"],
        "Rate": row["Rate"] * 100,
        "Sample Size": n,
        "p-value": pval,
        "Significant": pval < 0.05
    })
sig_df = pd.DataFrame(results)
display(sig_df)

Unnamed: 0,Sequences,Rate,Sample Size,p-value,Significant
0,1.0,34.650035,1443.0,0.29783,False
1,2.0,41.493776,723.0,0.595358,False
2,3.0,114.503817,524.0,0.1171,False
3,4.0,40.160643,498.0,0.626554,False
4,5.0,88.300221,453.0,0.414383,False
5,6.0,34.722222,288.0,0.619273,False
6,7.0,45.248869,221.0,0.816761,False
7,8.0,198.675497,151.0,0.029095,True
8,9.0,0.0,86.0,0.48172,False
9,10.0,0.0,65.0,0.540771,False


In [None]:
# Check significance based on number of sales activities
# Results - between 30 - 70 touches were statiscally effective

amt_of_activities = (
    df.groupby("Number of Sales Activities")["Meeting Booked"]
        .agg(["count", "sum"])
        .reset_index()
    )
amt_of_activities["Rate"] = amt_of_activities["sum"] / amt_of_activities["count"] * 100

total_success = (df["Meeting Booked"] != 0).sum()
total_contacts = len(df)
overall_rate = total_success / total_contacts * 100
print(f"Overall booking rate: {overall_rate}.3%")

In [None]:
results = []
for _, row in amt_of_activities.iterrows():
    n = row["count"]
    x = row["sum"]
    if n < 15:
        continue
    stat, pval = proportions_ztest([x, total_success], [n, total_contacts])
    results.append({
        "Sales Activities": row["Number of Sales Activities"],
        "Rate": row["Rate"] * 100,
        "Sample Size": n,
        "p-value": pval,
        "Significant": pval < 0.05
    })
sig_df = pd.DataFrame(results)
display(sig_df[sig_df["Significant"] != False])

In [None]:
# Check significance based solely on role category (job level)
# Result - not statistically significant

df["role_category"] = df.apply(lambda row: row_to_prospect(row).get_role_category(), axis=1)
summary = (
    df.groupby('role_category')["Meeting Booked"]
    .agg(["count", 'sum'])
    .reset_index()
)

summary["Rate"] = summary["sum"] / summary["count"] * 100

summary = summary[summary["count"] >= 20]

total_success = (df["Meeting Booked"] != 0).sum()
total_contacts = len(df)
overall_rate = total_success / total_contacts

results = []
for _, row in summary.iterrows():
    n = row["count"]
    x = row["sum"]

    stat, pval = proportions_ztest([x, total_success], [n, total_contacts])
    results.append({
        "Role Category": row["role_category"],        
        "Rate": row["Rate"] * 100,
        "Sample Size": n,
        "p-value": pval,
        "Significant": pval < 0.05
    })
sig_df = pd.DataFrame(results)
display(sig_df)

In [None]:
# Check based on functional group
# Results - workplace and capital/campus planning good, no discernable functional group is bad

df["func_group"] = df.apply(lambda row: row_to_prospect(row).get_functional_group(), axis=1)
summary = (
    df.groupby(['role_category', "func_group"])["Meeting Booked"]
    .agg(["count", 'sum'])
    .reset_index()
)

summary["Rate"] = summary["sum"] / summary["count"] * 100

summary = summary[summary["count"] >= 20]

total_success = (df["Meeting Booked"] != 0).sum()
total_contacts = len(df)
overall_rate = total_success / total_contacts

results = []
for _, row in summary.iterrows():
    n = row["count"]
    x = row["sum"]

    stat, pval = proportions_ztest([x, total_success], [n, total_contacts])
    results.append({
        "Functional Group": row["func_group"],        
        "Rate": row["Rate"] * 100,
        "Sample Size": n,
        "p-value": pval,
        "Significant": pval < 0.05
    })
sig_df = pd.DataFrame(results)
display(sig_df[sig_df["Significant"] == True])

In [None]:
# Check significance based on title and function (together)
# Result:
    # Director of Campus/Capital Planning and Director of Workplace
    # Manager of Other was statistically worse off

df["role_category"] = df.apply(lambda row: row_to_prospect(row).get_role_category(), axis=1)
df["func_group"] = df.apply(lambda row: row_to_prospect(row).get_functional_group(), axis=1)
summary = (
    df.groupby(['role_category', "func_group"])["Meeting Booked"]
    .agg(["count", 'sum'])
    .reset_index()
)

summary["Rate"] = summary["sum"] / summary["count"] * 100

summary = summary[summary["count"] >= 20]

total_success = (df["Meeting Booked"] != 0).sum()
total_contacts = len(df)
overall_rate = total_success / total_contacts

results = []
for _, row in summary.iterrows():
    n = row["count"]
    x = row["sum"]

    stat, pval = proportions_ztest([x, total_success], [n, total_contacts])
    results.append({
        "Role Category": row["role_category"],
        "Functional Group": row["func_group"],        
        "Rate": row["Rate"] * 100,
        "Sample Size": n,
        "p-value": pval,
        "Significant": pval < 0.05
    })
sig_df = pd.DataFrame(results)
display(sig_df)

In [None]:
# Check significance from sequences
# Result - Campus/Capital Planning was significantly more effective
summary = (
    df.groupby("Last Sequence Enrolled")["Meeting Booked"]
    .agg(["count", 'sum'])
    .reset_index()
)

summary["Rate"] = summary["sum"] / summary["count"] * 100

summary = summary[summary["count"] >= 20]

total_success = (df["Meeting Booked"] != 0).sum()
total_contacts = len(df)
overall_rate = total_success / total_contacts

results = []
for _, row in summary.iterrows():
    n = row["count"]
    x = row["sum"]

    stat, pval = proportions_ztest([x, total_success], [n, total_contacts])
    results.append({
        "Last Sequence Enrolled": row["Last Sequence Enrolled"],        
        "Rate": row["Rate"] * 100,
        "Sample Size": n,
        "p-value": pval,
        "Significant": pval < 0.05
    })
sig_df = pd.DataFrame(results)
display(sig_df)