In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from scipy.stats import mannwhitneyu


In [None]:
data = pd.read_csv("./ab_test.csv")

In [None]:
data.isnull().mean()

In [None]:
numeric_data = data.select_dtypes(include=[float, int])

# Add the 'ab_test_cohort' column back to the numeric data
numeric_data['ab_test_cohort'] = data['ab_test_cohort']

# Calculate the mean and standard deviation for each metric
average_and_std_metrics = numeric_data.groupby('ab_test_cohort').agg(['mean', 'std'])
average_and_std_metrics

In [None]:
# Plotting
variables = ['total_revenue', 'iap_revenue', 'ads_revenue', 'sub_revenue', 'meaningful_discussions', 'sessions']
for variable in variables:
    plt.figure()
    sns.histplot(data=data, x=variable, hue='ab_test_cohort', element='step', stat='density', common_norm=False)
    plt.title(f'Distribution of {variable}')
    plt.show()

### Because there is no normal distribution we opt for a non paramtric test for all the metrics

In [None]:
def non_param_test(data: pd.DataFrame, column: str):
    control_sessions = data[data['ab_test_cohort'] == 'control'][column]
    test_sessions = data[data['ab_test_cohort'] == 'test'][column]

    # Conducting the Mann-Whitney U Test
    stat, p_value = mannwhitneyu(control_sessions, test_sessions, alternative='two-sided')
    delta = test_sessions.mean() - control_sessions.mean()
    print(f'Mann-Whitney U statistic: {stat}')
    print(f'change: {delta}')
    print(f'P-value: {p_value}')

    return [column, control_sessions.mean(), test_sessions.mean(), delta, delta/control_sessions.mean(), p_value]

In [33]:
columns = ['total_revenue', 'iap_revenue', 'ads_revenue', 'sub_revenue', 'meaningful_discussions', 'sessions']
final_table = []
for column in columns:
    print(f'\n{column}')
    final_table.append(non_param_test(data, column))

pd.DataFrame(final_table, columns=['Metric', 'Control', 'Test', 'Relative Change', 'absolute change', 'P-value'])


total_revenue
Mann-Whitney U statistic: 1952298585.0
change: -0.003182171690395044
P-value: 0.008163835293663428

iap_revenue
Mann-Whitney U statistic: 1935389043.5
change: -0.0038231883464387995
P-value: 0.2897036492035806

ads_revenue
Mann-Whitney U statistic: 1953234304.5
change: -0.0009828343130910133
P-value: 0.005102095708061895

sub_revenue
Mann-Whitney U statistic: 1936189099.5
change: 0.0017355751291245336
P-value: 0.9527956096631967

meaningful_discussions
Mann-Whitney U statistic: 1901015087.0
change: 0.04696797588128132
P-value: 2.1925116647319975e-12

sessions
Mann-Whitney U statistic: 1921981266.0
change: 0.6085534697122092
P-value: 0.02526004654664607


Unnamed: 0,Metric,Control,Test,Relative Change,absolute change,P-value
0,total_revenue,0.080094,0.076912,-0.003182,-0.039731,0.008163835
1,iap_revenue,0.040018,0.036195,-0.003823,-0.095537,0.2897036
2,ads_revenue,0.017381,0.016398,-0.000983,-0.056546,0.005102096
3,sub_revenue,0.022695,0.02443,0.001736,0.076475,0.9527956
4,meaningful_discussions,0.825523,0.872491,0.046968,0.056895,2.192512e-12
5,sessions,29.708419,30.316973,0.608553,0.020484,0.02526005
