In [15]:
import warnings
warnings.filterwarnings('ignore')

In [18]:
from scipy.stats import norm
import numpy as np

class BinomialSatherwaiteSampleSize:
    def __init__(self,baseline_conversion, delta, alpha=0.05, beta=0.2, tailed="two_tailed"):
        self.baseline_conversion=baseline_conversion
        self.delta=delta
        self.alpha=alpha
        self.beta=beta
        self.treatment_conversion=self.baseline_conversion+self.delta
        self.tailed=tailed
#         print(self.baseline_conversion, self.delta, self.beta )
    def get_sample_size(self):
        control_variance=self.baseline_conversion*(1-self.baseline_conversion)
        treatment_variance=self.treatment_conversion*(1-self.treatment_conversion)

        if self.tailed=="two_tailed":
            control_zscore=norm.ppf(1-self.alpha/2)
        elif self.tailed=="one_tailed":
            control_zscore=norm.ppf(1-self.alpha)
        else:
            raise Exception("Wrong input for tailed, select one of [one_tailed,two_tailed]")

        treatment_zscore=norm.ppf(1-self.beta)
        sample_size=int(np.rint(np.power((control_zscore*np.sqrt(2*control_variance) + treatment_zscore*np.sqrt(control_variance + treatment_variance)), 2)/np.power(self.delta,2)))
        return sample_size
  
  
class BinomialPooledSampleSize:
    def __init__(self,baseline_conversion, delta, alpha=0.05, beta=0.2, tailed="two_tailed"):
        self.baseline_conversion=baseline_conversion
        self.delta=delta
        self.alpha=alpha
        self.beta=beta
        self.treatment_conversion=self.baseline_conversion+self.delta
        self.tailed=tailed
    def get_sample_size(self):
        pooled_conversion=(self.baseline_conversion+ self.treatment_conversion)/2
        pooled_variance=pooled_conversion*(1-pooled_conversion)

        if self.tailed=="two_tailed":
            control_zscore=norm.ppf(1-self.alpha/2)
        elif self.tailed=="one_tailed":
            control_zscore=norm.ppf(1-self.alpha)
        else:
            raise Exception("Wrong input for tailed, select one of [one_tailed,two_tailed]")
        treatment_zscore=norm.ppf(1-self.beta)
        sample_size=int(np.rint(2*pooled_variance*np.power((control_zscore + treatment_zscore), 2)/np.power(self.delta,2)))
        return sample_size

class ContinuousSampleSize:
    def __init__(self,baseline_mean, baseline_std, delta, alpha=0.05, beta=0.2, tailed="two_tailed"):
        self.baseline_mean=baseline_mean
        self.baseline_std=baseline_std
        self.delta=delta
        self.alpha=alpha
        self.beta=beta
        self.treatment_mean=self.baseline_mean+self.delta
        self.tailed=tailed
    
    def get_sample_size(self):
        treatment_mean=self.baseline_mean+ self.delta
        if self.tailed=="two_tailed":
            control_zscore=norm.ppf(1-self.alpha/2)
        elif self.tailed=="one_tailed":
            control_zscore=norm.ppf(1-self.alpha)
        else:
            raise Exception("Wrong input for tailed, select one of [one_tailed,two_tailed]")
        treatment_zscore=norm.ppf(1-self.beta)

        sample_size=int(np.rint(2*self.baseline_std*self.baseline_std*np.power((control_zscore + treatment_zscore), 2)/np.power(self.delta,2)))
        return sample_size


In [17]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from ipywidgets import interact, widgets

# Define the interactive widgets
N_simulation = widgets.IntSlider(min=1, max=1000, step=5, value=100, description='N_simulation', style={'description_width': 'initial'})
sample_size = widgets.IntSlider(min=1, max=1000, step=5, value=100, description='sample_size', style={'description_width': 'initial'})
low = widgets.IntSlider(min=0, max=100, step=1, value=0, description='low', style={'description_width': 'initial'})
high = widgets.IntSlider(min=1, max=100, step=1, value=1, description='high', style={'description_width': 'initial'})

# Define the CLT simulation function
@interact(N_simulation=N_simulation, sample_size=sample_size, low=low, high=high)
def CLT_simulation(N_simulation, sample_size, low, high):
    mean_list = []
    for x in range(N_simulation):
        data = np.random.uniform(low=low, high=high, size=sample_size)
        mean_list.append(np.mean(data))
        
    ax1 = sns.distplot(mean_list, hist=False)
    plt.title('CLT')
    plt.xlabel('Sample mean')
    plt.ylabel('Density')

# Show the interactive plot
plt.show()


interactive(children=(IntSlider(value=100, description='N_simulation', max=1000, min=1, step=5, style=SliderSt…

In [19]:
alpha = widgets.FloatSlider(min=0.01, max=1, step=0.01, value=0.05, description='alpha',style= {'description_width': 'initial'})
beta = widgets.FloatSlider(min=0.01, max=1, step=0.01, value=0.2, description='beta',style= {'description_width': 'initial'})
baseline_conversion = widgets.FloatSlider(min=0.01, max=1, step=0.01, value=0.3, description='baseline_conversion',style= {'description_width': 'initial'})
delta = widgets.FloatSlider(min=0.01, max=1, step=0.01, value=0.02, description='absolute delta', style= {'description_width': 'initial'})

tail_dropdown = widgets.Dropdown(
    options= ["one_tailed", "two_tailed"],
    index=1,           
    disabled=False,        
    description='experiment_type'  ,style= {'description_width': 'initial'}
)


# Specifying the handler for the argument in the function
@interact(alpha = alpha, beta = beta, baseline_conversion=baseline_conversion,
          delta=delta, tail_dropdown=tail_dropdown )
def proportion_sample_size_calculator(alpha, beta , baseline_conversion,
          delta, tail_dropdown):
  
    ss_calculator=BinomialSatherwaiteSampleSize(baseline_conversion, delta, alpha=alpha, beta=beta, tailed=tail_dropdown)
    sample_size=ss_calculator.get_sample_size()
    print(f'''At alpha:{alpha}, beta: {beta}, baseline_conversion: {baseline_conversion}, absolute_delta:{delta}
          You will need {sample_size} samples per variation''')
    

interactive(children=(FloatSlider(value=0.05, description='alpha', max=1.0, min=0.01, step=0.01, style=SliderS…

In [21]:
alpha = widgets.FloatSlider(min=0.01, max=1, step=0.01, value=0.05, description='alpha',style= {'description_width': 'initial'})
beta = widgets.FloatSlider(min=0.01, max=1, step=0.01, value=0.2, description='beta',style= {'description_width': 'initial'})
baseline_mean = widgets.IntSlider(min=1, max=100, step=1, value=50, description='baseline_mean',style= {'description_width': 'initial'})
baseline_std = widgets.IntSlider(min=1, max=1000, step=1, value=5, description='baseline_std',style= {'description_width': 'initial'})
delta = widgets.FloatSlider(min=1, max=100, step=1, value=5, description='absolute delta', style= {'description_width': 'initial'})

tail_dropdown = widgets.Dropdown(
    options= ["one_tailed", "two_tailed"],
    index=1,           
    disabled=False,        
    description='hypothesis_type'  ,style= {'description_width': 'initial'}
)


# Specifying the handler for the argument in the function
@interact(alpha = alpha, beta = beta, baseline_mean=baseline_mean, baseline_std=baseline_std,
          delta=delta, tail_dropdown=tail_dropdown )
def proportion_sample_size_calculator(alpha, beta , baseline_mean,baseline_std,
          delta, tail_dropdown):
  
    ss_calculator=ContinuousSampleSize(baseline_mean, baseline_std, delta, alpha=alpha, beta=beta, tailed=tail_dropdown)
    sample_size=ss_calculator.get_sample_size()
    print(f'''At alpha:{alpha}, beta: {beta}, baseline_mean: {baseline_mean}, baseline_std: {baseline_std} , absolute_delta:{delta}, hypothesis_type:{tail_dropdown}
          You will need {sample_size} samples per variation''')
    

interactive(children=(FloatSlider(value=0.05, description='alpha', max=1.0, min=0.01, step=0.01, style=SliderS…

In [20]:
alpha = widgets.FloatSlider(min=0.01, max=1, step=0.01, value=0.05, description='Significance level',style= {'description_width': 'initial'})

control_mean = widgets.IntSlider(min=1, max=100, step=1, value=50, description='control_mean')
treatment_mean = widgets.IntSlider(min=1, max=100, step=1, value=65, description='treatment_mean', style= {'description_width': 'initial'})

control_std = widgets.IntSlider(min=1, max=100, step=1, value=5, description='control_std', style= {'description_width': 'initial'})
treatment_std = widgets.IntSlider(min=1, max=100, step=1, value=5,  description='treatment_std', style= {'description_width': 'initial'})

control_sample_size = widgets.IntSlider(min=50, max=100000, step=50, value=1000,  description='control_sample_size', style= {'description_width': 'initial'})
treatment_sample_size = widgets.IntSlider(min=50, max=100000, step=50, value=1000,  description='treatment_sample_size', style= {'description_width': 'initial'})

# Specifying the handler for the n argument in the function
@interact(alpha=alpha,control_mean = control_mean, treatment_mean = treatment_mean, control_std=control_std,
          treatment_std=treatment_std, control_sample_size=control_sample_size, 
          treatment_sample_size=treatment_sample_size )
def plot_random_scatterplot(alpha, control_mean, treatment_mean,control_std, treatment_std, control_sample_size, treatment_sample_size):
  
   
    control_data = np.random.normal(loc=control_mean, scale=control_std, size=control_sample_size)
    treatment_data = np.random.normal(loc=treatment_mean, scale=treatment_std, size=treatment_sample_size)
#     bins = int(np.sqrt(len(control_data)))
    ax1=sns.distplot(control_data , hist=False )
    ax2=sns.distplot(treatment_data, hist=False)
    kde_x1, kde_y1 = ax1.lines[0].get_data()
    kde_x2, kde_y2 = ax2.lines[1].get_data()
#     print(kde_y1,kde_y2)
    #plotting the two lines
    
#     poi1,poi2=find_intersection_points(ax1, ax2,control_mean)

    poi1= np.mean(control_data)+norm.ppf(1-alpha)*np.std(control_data)
#     print(poi1,poi2)
#     x0=40
#     

    if poi1!=None:
        p1 = plt.axvline(x=poi1,color='#EF9A9A')
#         p2 = plt.axvline(x=poi2,color='#EF9A9A')
        
        ax1.fill_between(kde_x1, kde_y1, where=(kde_x1>poi1) , 
                        interpolate=True, color='#EF9A9A')
        
        ax2.fill_between(kde_x2, kde_y2, where=((kde_x2<poi1)) , 
                        interpolate=True, color='#0F9A9A')

    plt.title('Control vs Treatment sampled mean distribution')
    plt.xlabel('data')
    plt.ylabel('density')

interactive(children=(FloatSlider(value=0.05, description='Significance level', max=1.0, min=0.01, step=0.01, …