# Sample Size Calculator

How to use: https://tubitv.atlassian.net/wiki/spaces/DST/pages/2045116757/TDR+Sample+Size+Calculator

## Specify your settings

In [1]:
import tubi_data_runtime as tdr
import math
import pandas as pd
import numpy as np
from datetime import date

import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual, SelectMultiple, Button, Output, Dropdown, Accordion, Tab, HBox, VBox, Layout
from IPython.display import clear_output, display as ipy_display
from traitlets import traitlets

import warnings
from statsmodels.tools.sm_exceptions import ConvergenceWarning
warnings.simplefilter('ignore', ConvergenceWarning)

from ssc_utils.filter_generator import filter_generator
from ssc_utils.raw_user_data import raw_user_data
from ssc_utils.metric_switcher import metric_switcher
from ssc_utils.metric_summary import metric_summary
from ssc_utils.cuped import cuped
import ssc_utils.calculator as c

# load choices
event_name_choices = filter_generator().event_name_choices()
filter_metrics_choices = filter_generator().filter_metrics_choices()

In [2]:
EFFECT_SIZE_RELATIVE = interactive(c.effect, effect=(0.0,1.0,0.0001))
NUMBER_VARIATIONS = interactive(c.treatments, treatments=(0,8,1))
ALLOCATION = interactive(c.allocation, allocation=(0.0,1.0,0.01))
POWER = interactive(c.power, power=(0.0,1.0,0.01))
ALPHA = interactive(c.alpha, alpha=(0.0,1.0,0.01))

primary_metric = interactive(metric_switcher().choose_metric, metric = metric_switcher().possible_metrics())

attribute_filter = interactive(filter_generator().make_sql_condition_string, 
                                   field = filter_generator().filter_attributes_choices(), 
                                   condition = filter_generator().condition_choices(), 
                                   value = '', 
                                   filter_type = fixed('attribute'))

metric_filter = interactive(filter_generator().make_sql_condition_string, 
                                field = filter_metrics_choices, 
                                condition = filter_generator().condition_choices(), 
                                value = '', 
                                filter_type = fixed('metric'))


primary_event = SelectMultiple(
    options = event_name_choices,
    value = ('no event filter',), 
    description='event'
#     disabled=False
)

primary_event_sub_cond = interactive(filter_generator().make_sql_condition_string, 
                              field = filter_generator().event_sub_cond_field_choices(), 
                              condition = filter_generator().condition_choices(), 
                              value = '', 
                              filter_type = fixed('event'))


pre_event = SelectMultiple(
    options = event_name_choices,
    value = ('no event filter',), 
    description='event',
    disabled=False
)

pre_event_sub_cond = interactive(filter_generator().make_sql_condition_string, 
                              field = filter_generator().event_sub_cond_field_choices(), 
                              condition = filter_generator().condition_choices(), 
                              value = '', 
                              filter_type = fixed('event'))


time_interval = interactive(filter_generator().interval, interval = 'NULL')

In [3]:
parameters = VBox([EFFECT_SIZE_RELATIVE, NUMBER_VARIATIONS, ALLOCATION, POWER, ALPHA, primary_metric])

filter_accordion = Accordion([attribute_filter, metric_filter])
filter_accordion_titles = ['attribute', 'metric']
for i in range(0,len(filter_accordion_titles)):
    filter_accordion.set_title(i, filter_accordion_titles[i])

primary_event_set = VBox([primary_event, primary_event_sub_cond])
pre_event_set = VBox([pre_event, pre_event_sub_cond, time_interval])

event_filter_accordion = Accordion([primary_event_set, pre_event_set])
event_filter_accordion_titles = ['primary event','pre event']
for i in range(0,len(event_filter_accordion_titles)):
    event_filter_accordion.set_title(i, event_filter_accordion_titles[i])    

list_widgets  = [
    parameters,
    filter_accordion,
    event_filter_accordion   
]

tab = Tab(children = list_widgets)

titles = ['parameters','filters','event filters']
for i in range(0,len(titles)):
    tab.set_title(i, titles[i])
tab

Tab(children=(VBox(children=(interactive(children=(FloatSlider(value=0.01, description='effect', max=1.0, step…

In [20]:
def concat_3child_filters(filt):
    if filt.children[0].value != 'no filters':
        return filt.children[0].value + ' ' + filt.children[1].value + ' ' + filt.children[2].value
    else: 
        return 

    
filter_output = Output()
show_filter_button = Button(description="Show all filters", layout=Layout(width='200px'))
ipy_display()

def print_filters_on_button_clicked(b):
    filter_output.clear_output(wait = True)
    with filter_output:
        print('parameters selected:') 
        print('\t effect size', EFFECT_SIZE_RELATIVE.result)
        print('\t number of variations', NUMBER_VARIATIONS.result)
        print('\t allocation', ALLOCATION.result)
        print('\t power', POWER.result)
        print('\t alpha', ALPHA.result)

        print()
        print('metric selected:', primary_metric.result) 

        print()
        print('filters selected:')
        print('\t', concat_3child_filters(attribute_filter))
        print('\t', concat_3child_filters(metric_filter))

        print()
        print('event filters selected:')
        if primary_event.value[0] != 'no event filter': 
            print('\t', 'primary event:', primary_event.value[0], ';', concat_3child_filters(primary_event_sub_cond))
        else: print('\t', None)
        if pre_event.value[0] != 'no event filter': 
            print('\t', 'pre event:', pre_event.value[0], ';', concat_3child_filters(pre_event_sub_cond), )
            if time_interval.children[0].value != 'NULL':
                print('time interval between events:', time_interval.children[0].value)

In [21]:
apply_output = Output()
apply_button = Button(description="Apply filters", layout=Layout(width='200px'))

def apply_on_button_clicked(b):
    global FINAL_SQL
    filters_sql = filter_generator().generate_filter_cte(attribute_condition_interact = attribute_filter, 
                                                         metric_condition_interact = metric_filter, 
                                                         event1_condition_interact = pre_event, 
                                                         event1_sub_condition_interact = pre_event_sub_cond, 
                                                         event2_condition_interact = primary_event, 
                                                         event2_sub_condition_interact = primary_event_sub_cond, 
                                                         event_time_interval_interact = time_interval)
    raw_user_sql = raw_user_data().generate_raw_user_data_cte(prev_cte_sql = filters_sql)
    user_sql = metric_switcher().generate_user_data_cte(primary_metric.result) 
    summary_sql = metric_summary().generate_metric_summary_cte() 
    cuped_sql = cuped().generate_cuped_cte(event2_condition_interact = primary_event)

    FINAL_SQL =  filters_sql + raw_user_sql + user_sql + summary_sql + cuped_sql
    
ipy_display(apply_button, apply_output)
apply_button.on_click(apply_on_button_clicked)

## Results

In [22]:
output = Output()
run_button = Button(description="Calculate sample size", layout=Layout(width='200px'))

def run_on_button_clicked(b):
    output.clear_output(wait = True)
    with output:
        print("Running...estimated time: ~5 min")
        raw_df = tdr.query_redshift(FINAL_SQL).to_df()

        final_df = c.calculate_sample_required(df = raw_df, 
                                               effect_size_relative = EFFECT_SIZE_RELATIVE, 
                                               number_variations = NUMBER_VARIATIONS, 
                                               allocation = ALLOCATION, 
                                               power = POWER, 
                                               alpha = ALPHA)
        clear_output(wait=True)
        display(final_df.sort_values('platform').style.hide_index().set_precision(3))

In [23]:
# output SQL for debugging purposes; can copy and manually run this elsewhere

sql_output = Output()
print_button = Button(description="Print SQL (for debugging)", layout=Layout(width='200px'))

def print_sql_on_button_clicked(b):
    sql_output.clear_output(wait = True)
    with sql_output:
        print(FINAL_SQL)

In [24]:
ipy_display(run_button, output)
run_button.on_click(run_on_button_clicked)

Button(description='Calculate sample size', layout=Layout(width='200px'), style=ButtonStyle())

Output()

## Problems? Show filters and/or SQL to debug

In [9]:
ipy_display(show_filter_button, filter_output, print_button, sql_output)
show_filter_button.on_click(print_filters_on_button_clicked)
print_button.on_click(print_sql_on_button_clicked)

Button(description='Show all filters', layout=Layout(width='200px'), style=ButtonStyle())

Output()

Button(description='Print SQL (for debugging)', layout=Layout(width='200px'), style=ButtonStyle())

Output()