In [None]:
python
import pandas as pd

import numpy as np

from scipy import stats



# Data provided for the KPIs

data = {

    'Category': [

        'Cohort Satisfaction', 'Handling Customer Demand', 'Time To Execute',

        'Customer Satisfaction', 'Billed Revenue', 'ACR', 'Customer Adds', 'NPSA'

    ],

    'Baseline_Pre': [4, 7, 2, 2, 20_000_000, 100, 4, 5],

    'Constant_Post': [5, 5, 3, 5, 30_000_000, 30_000, 3, 5],

    'Pilot_Post': [7, 2, 7, 7, 40_000_000, 1, 7, 7],

    'Delta_Constant_vs_Baseline': [1, -2, 1, 3, 10_000_000, 29_900, -1, 0],

    'Delta_Pilot_vs_Baseline': [3, -5, 5, 5, 20_000_000, -99, 3, 2],

    'Percent_Dif_Constant_vs_Pilot': [50, -43, 200, 100, 50, -29_999, 100, 40],

    'Remove_Constant_Outliers_Post': [5, np.nan, np.nan, np.nan, 30_000_000, 2, np.nan, np.nan],

    'Delta_Constant_vs_Baseline_O': [1, np.nan, np.nan, np.nan, 10_000_000, -98, np.nan, np.nan],

    'Remove_Pilot_Outliers_Post': [9, np.nan, np.nan, np.nan, 20_000_000, 1, np.nan, np.nan],

    'Delta_Pilot_vs_Baseline_O': [4, np.nan, np.nan, np.nan, 0, -99, np.nan, np.nan],

    'Percent_Dif_Removing_Outliers': [100, np.nan, np.nan, np.nan, -50, -1, np.nan, np.nan]

}



# Convert the data into a DataFrame

df = pd.DataFrame(data)



# Calculate the p-values for the deltas using a paired t-test

# For simplicity, we assume that the provided data can be treated as if it were normally distributed and that the groups are paired

# This is a simplification and may not be appropriate for actual statistical analysis

# We will calculate p-values for Baseline vs Constant and Baseline vs Pilot



# For continuous data, we will use the t-test for paired samples

# For revenue, we will convert to millions for simplicity in calculation

df.loc[df['Category'] == 'Billed Revenue', ['Baseline_Pre', 'Constant_Post', 'Pilot_Post']] /= 1_000_000



# For ACR which has a large scale difference, we will use log transformation to normalize the scale

df.loc[df['Category'] == 'ACR', ['Baseline_Pre', 'Constant_Post', 'Pilot_Post']] = np.log(

    df.loc[df['Category'] == 'ACR', ['Baseline_Pre', 'Constant_Post', 'Pilot_Post']] + 1)  # log(0) is undefined



# Perform t-tests for paired samples

results = {}

for i, category in enumerate(df['Category']):

    # Skipping categorical data for now

    if category in ['Cohort Satisfaction', 'Handling Customer Demand', 'Customer Satisfaction', 'Customer Adds', 'NPSA']:

        continue



    baseline_pre = df.loc[i, 'Baseline_Pre']

    constant_post = df.loc[i, 'Constant_Post']

    pilot_post = df.loc[i, 'Pilot_Post']

    

    # We will create dummy paired samples with the same size for the t-test

    # In a real scenario, we would have the raw data for each matched pair

    sample_size = 30  # assuming a sample size of 30 for illustration

    np.random.seed(0)  # For reproducibility

    baseline_data = np.random.normal(baseline_pre, 1, sample_size)

    constant_data = np.random.normal(constant_post, 1, sample_size)

    pilot_data = np.random.normal(pilot_post, 1, sample_size)

    

    # Perform the t-test for constant vs. baseline

    t_stat_constant, p_val_constant = stats.ttest_rel(baseline_data, constant_data)

    

    # Perform the t-test for pilot vs. baseline

    t_stat_pilot, p_val_pilot = stats.ttest_rel(baseline_data, pilot_data)

    

    # Store the results

    results[category] = {

        't_stat_constant': t_stat_constant,

        'p
