# rosalie dev

Notebook purpose:

- Develop `rosalie` package and templates for working with it

In [18]:
import logging
import os

import numpy as np
import pandas as pd
from scipy.stats import ttest_ind, mannwhitneyu

import rosalie as ro
import fabutils as fu


pd.set_option('display.max_rows', 500)

%config InlineBackend.figure_format ='retina'
%load_ext line_profiler
%load_ext autoreload
%autoreload 2

The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [12]:
df = fu.dgp.dgp_cuped().make_data(n=10_000, seed=2312)
df.head()

Unnamed: 0,id,y,x,d
0,0,15.411265,16.416167,0
1,1,23.234019,17.460931,1
2,2,30.272267,22.942863,1
3,3,21.426262,17.170929,1
4,4,20.06835,17.922234,1


In [66]:
def welch_t_test(df, metric, d="d"):
    """Return p-value of Welch's t-test."""
    control_sample = df.query("assignments == 'control'")[metric]
    variant_sample = df.query("assignments == 'treatment'")[metric]
    return ttest_ind(control_sample, variant_sample, equal_var=False).pvalue

def mww(df, metric, d="d"):
    """Return p-value of Mann-Whitney-Wilcoxson U-test."""
    control_sample = df.query("assignments == 'control'")[metric]
    variant_sample = df.query("assignments == 'treatment'")[metric]
    return mannwhitneyu(control_sample, variant_sample).pvalue


In [68]:

eval = ro.Simulator(
    df=df,
    metrics=['y'],
    evaluators=[welch_t_test, mww],
    sample_min=100,
    sample_max=10_000,
    num_steps=4,
    mdes=[0.02],
    num_runs=40,
    verbose=True,
)
result = eval.run()
result.plot().display()

INFO - Initializing Simulator with specified evaluators: ['welch_t_test', 'mww']
100%|██████████| 4/4 [00:03<00:00,  1.05it/s]
