In [24]:
%load_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.stats.proportion import proportions_ztest, proportion_confint
from analyzer.logic import load_contacts, row_to_prospect, plot_booking_rate, total_touches_analysis
from analyzer.search import ProspectSearcher

emails_per_person = 4.419
sequences_seen = 1.202

df = load_contacts("cleaned-data-main-seq-25.csv")
df.head()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Unnamed: 0,Record ID,First,Last,Name,Currently In Sequence,Job Title,Company,State,Number of Sequences Enrolled,Number of Sales Activities,Create Date,Last Sequence Enrolled,Last Sequence Ended Date,Last Contacted,Last Engagement Date,Meeting Booked
0,28166310000.0,Michael,Reese,Michael Reese,False,"Head Senior VP, Corporate Real Estate & Facili...",Customers Bank,Pennsylvania,3,37,6/7/2024 14:09,269668391,3/7/2025 14:23,4/2/2025 11:15,4/16/2025 16:35,1
1,4373301.0,Kris,Weir,Kris Weir,False,"Space Planning Director, Operations",Questrom School of Business,Massachusetts,8,37,3/7/2022 9:28,279559810,7/9/2025 12:33,10/20/2025 15:00,10/16/2025 12:57,1
2,3323551.0,Scott,Albert,Scott Albert,False,Vice President of Facilities,Slippery Rock University,Pennsylvania,14,67,10/18/2021 9:14,279109668,7/30/2025 14:25,10/15/2025 10:20,10/15/2025 12:04,1
3,144612000000.0,Amy,Ciba,Amy Ciba,False,"Senior Director, Global Workplaces",Meltwater,Stockholms län,1,9,8/6/2025 15:11,269580465,8/6/2025 16:46,9/4/2025 12:54,8/17/2025 14:52,1
4,3000951.0,Jessica,Davenport,Jessica Davenport,False,Director Campus Planning & Design,Salt Lake Community College,Utah,8,74,7/19/2021 17:23,279559810,7/9/2025 10:06,9/17/2025 15:39,7/15/2025 10:35,1


In [25]:
amt_of_sequences = (
    df.groupby("Number of Sequences Enrolled")["Meeting Booked"]
        .agg(["count", "sum"])
        .reset_index()
    )
amt_of_sequences["Rate"] = amt_of_sequences["sum"] / amt_of_sequences["count"] * 100

total_success = (df["Meeting Booked"] != 0).sum()
total_contacts = len(df)
overall_rate = total_success / total_contacts * 100
print(f"Overall booking rate: {overall_rate}.3%")

Overall booking rate: 0.572309046885318.3%


In [26]:
results = []
for _, row in amt_of_sequences.iterrows():
    n = row["count"]
    x = row["sum"]
    if n < 15:
        continue
    stat, pval = proportions_ztest([x, total_success], [n, total_contacts])
    results.append({
        "Sequences": row["Number of Sequences Enrolled"],
        "Rate": row["Rate"] * 100,
        "Sample Size": n,
        "p-value": pval,
        "Significant": pval < 0.05
    })
sig_df = pd.DataFrame(results)
display(sig_df)

Unnamed: 0,Sequences,Rate,Sample Size,p-value,Significant
0,1.0,34.650035,1443.0,0.29783,False
1,2.0,41.493776,723.0,0.595358,False
2,3.0,114.503817,524.0,0.1171,False
3,4.0,40.160643,498.0,0.626554,False
4,5.0,88.300221,453.0,0.414383,False
5,6.0,34.722222,288.0,0.619273,False
6,7.0,45.248869,221.0,0.816761,False
7,8.0,198.675497,151.0,0.029095,True
8,9.0,0.0,86.0,0.48172,False
9,10.0,0.0,65.0,0.540771,False


In [27]:
amt_of_activities = (
    df.groupby("Number of Sales Activities")["Meeting Booked"]
        .agg(["count", "sum"])
        .reset_index()
    )
amt_of_activities["Rate"] = amt_of_activities["sum"] / amt_of_activities["count"] * 100

total_success = (df["Meeting Booked"] != 0).sum()
total_contacts = len(df)
overall_rate = total_success / total_contacts * 100
print(f"Overall booking rate: {overall_rate}.3%")

Overall booking rate: 0.572309046885318.3%


In [28]:
results = []
for _, row in amt_of_activities.iterrows():
    n = row["count"]
    x = row["sum"]
    if n < 15:
        continue
    stat, pval = proportions_ztest([x, total_success], [n, total_contacts])
    results.append({
        "Sales Activities": row["Number of Sales Activities"],
        "Rate": row["Rate"] * 100,
        "Sample Size": n,
        "p-value": pval,
        "Significant": pval < 0.05
    })
sig_df = pd.DataFrame(results)
display(sig_df[sig_df["Significant"] != False])

Unnamed: 0,Sales Activities,Rate,Sample Size,p-value,Significant
32,33.0,333.333333,60.0,0.006285,True
36,37.0,363.636364,55.0,0.003692,True
39,40.0,465.116279,43.0,0.000632,True
49,50.0,344.827586,29.0,0.043913,True
63,64.0,588.235294,17.0,0.004394,True
64,65.0,555.555556,18.0,0.005949,True
66,67.0,588.235294,17.0,0.004394,True
68,69.0,476.190476,21.0,0.012496,True


In [44]:
df["role_category"] = df.apply(lambda row: row_to_prospect(row).get_role_category(), axis=1)
df["func_group"] = df.apply(lambda row: row_to_prospect(row).get_functional_group(), axis=1)
summary = (
    df.groupby(['role_category', "func_group"])["Meeting Booked"]
    .agg(["count", 'sum'])
    .reset_index()
)

summary["Rate"] = summary["sum"] / summary["count"] * 100

summary = summary[summary["count"] >= 20]

total_success = (df["Meeting Booked"] != 0).sum()
total_contacts = len(df)
overall_rate = total_success / total_contacts

results = []
for _, row in summary.iterrows():
    n = row["count"]
    x = row["sum"]

    stat, pval = proportions_ztest([x, total_success], [n, total_contacts])
    results.append({
        "Role Category": row["role_category"],
        "Functional Group": row["func_group"],        
        "Rate": row["Rate"] * 100,
        "Sample Size": n,
        "p-value": pval,
        "Significant": pval < 0.05
    })
sig_df = pd.DataFrame(results)
display(sig_df[sig_df["Significant"] == True])

Unnamed: 0,Role Category,Functional Group,Rate,Sample Size,p-value,Significant
0,Director,Campus/Capital,263.157895,76,0.021809,True
5,Director,Workplace,203.252033,246,0.00541,True
11,Manager,Other,416.666667,24,0.02197,True
