# Sample Size Calculator (WIP)

#### Steps:
 1) run first 3 cells  
 2) choose your conditions in third cell (if applicable)  
 3) run the last 2 cells and wait for results

In [1]:
import tubi_data_runtime as tdr
import math
import pandas as pd
import numpy as np
from datetime import date
# from statsmodels.stats.power import tt_ind_solve_power
from ipywidgets import interact, interactive, fixed, interact_manual, Button, HBox, VBox
import ipywidgets as widgets

from ssc_utils.filter_generator import filter_generator
from ssc_utils.raw_user_data import raw_user_data
from ssc_utils.metric_switcher import metric_switcher
from ssc_utils.metric_summary import metric_summary
from ssc_utils.cuped import cuped
import ssc_utils.calculator as c

# load choices
event_name_choices = filter_generator().event_name_choices()
filter_metrics_choices = filter_generator().filter_metrics_choices()

## Interactive calculator (end user starts here)

In [4]:
EFFECT_SIZE_RELATIVE = interactive(c.effect, effect=(0.0,1.0,0.01))
NUMBER_VARIATIONS = interactive(c.variations, variations=(0,8,1))
ALLOCATION = interactive(c.allocation, allocation=(0.0,1.0,0.01))
POWER = interactive(c.power, power=(0.0,1.0,0.01))
ALPHA = interactive(c.alpha, alpha=(0.0,1.0,0.01))

primary_metric = interactive(metric_switcher().choose_metric, metric = metric_switcher().possible_metrics())

attribute_filter = interactive(filter_generator().make_sql_condition_string, 
                                   field = filter_generator().filter_attributes_choices(), 
                                   condition = filter_generator().metric_conditions_choices(), 
                                   value = '', 
                                   filter_type = fixed('attribute'))

metric_filter = interactive(filter_generator().make_sql_condition_string, 
                                field = filter_metrics_choices, 
                                condition = filter_generator().metric_conditions_choices(), 
                                value = '', 
                                filter_type = fixed('metric'))


primary_event = widgets.SelectMultiple(
    options = event_name_choices,
    value = ('no event filter',), 
    description='Events'
#     disabled=False
)

primary_event_sub_cond = interactive(filter_generator().make_sql_condition_string, 
                              field = filter_generator().event_sub_cond_field_choices(), 
                              condition = filter_generator().metric_conditions_choices(), 
                              value = '', 
                              filter_type = fixed('event'))


pre_event = widgets.SelectMultiple(
    options = event_name_choices,
    value = ('no event filter',), 
    description='Events',
    disabled=False
)

pre_event_sub_cond = interactive(filter_generator().make_sql_condition_string, 
                              field = filter_generator().event_sub_cond_field_choices(), 
                              condition = filter_generator().metric_conditions_choices(), 
                              value = '', 
                              filter_type = fixed('event'))


time_interval = interactive(filter_generator().interval, interval = 'NULL')

In [5]:
parameters = widgets.VBox([EFFECT_SIZE_RELATIVE, NUMBER_VARIATIONS, ALLOCATION, POWER, ALPHA, primary_metric])

filter_accordion = widgets.Accordion([attribute_filter, metric_filter])
filter_accordion_titles = ['attribute', 'metric']
for i in range(0,len(filter_accordion_titles)):
    filter_accordion.set_title(i, filter_accordion_titles[i])

primary_event_set = VBox([primary_event, primary_event_sub_cond])
pre_event_set = VBox([pre_event, pre_event_sub_cond, time_interval])

event_filter_accordion = widgets.Accordion([primary_event_set, pre_event_set])
event_filter_accordion_titles = ['primary event','pre event']
for i in range(0,len(event_filter_accordion_titles)):
    event_filter_accordion.set_title(i, event_filter_accordion_titles[i])    

list_widgets  = [
    parameters,
    filter_accordion,
    event_filter_accordion   
]

tab = widgets.Tab(children = list_widgets)

titles = ['parameters','filters','event filters']
for i in range(0,len(titles)):
    tab.set_title(i, titles[i])
tab

Tab(children=(VBox(children=(interactive(children=(FloatSlider(value=0.01, description='effect', max=1.0, step…

## After the user specifies the settings, run everything below

In [4]:
filters_sql = filter_generator().generate_filter_cte(attribute_condition_interact = attribute_filter, 
                                                 metric_condition_interact = metric_filter, 
                                                 event1_condition_interact = pre_event, 
                                                 event1_sub_condition_interact = pre_event_sub_cond, 
                                                 event2_condition_interact = primary_event, 
                                                 event2_sub_condition_interact = primary_event_sub_cond, 
                                                 event_time_interval_interact = time_interval)
raw_user_sql = raw_user_data().generate_raw_user_data_cte(prev_cte_sql = filters_sql)
user_sql = metric_switcher().generate_user_data_cte(primary_metric.result) 
summary_sql = metric_summary().generate_metric_summary_cte() 
cuped_sql = cuped().generate_cuped_cte(event2_condition_interact = primary_event)

FINAL_SQL = filters_sql + raw_user_sql + user_sql + summary_sql + cuped_sql

# output SQL for debugging purposes; can copy and manually run this elsewhere
print(FINAL_SQL)

WITH raw_user_data AS (
              SELECT 
                  a.device_id,
                  device_first_seen_ts,
                  ds,
                  platform_type,
                  platform,
                  GETDATE() AS last_exposure_ds,
                  DATEADD('week', -2, DATE_TRUNC('week', last_exposure_ds)) AS first_exposure_ds,
                  -- Metrics
                  sum(tvt_sec) as tvt_sec,
                  sum(user_signup_count) as user_signup_count,
                  sum(device_registration_count) as device_registration_count,
                  sum(signup_or_registration_activity_count) as signup_or_registration_activity_count,
                  sum(visit_total_count) as visit_total_count
              FROM tubidw.device_metric_daily as a
        
              WHERE DATE_TRUNC('week',ds) >= dateadd('week', -4, DATE_TRUNC('week',GETDATE()))
                AND DATE_TRUNC('week',ds) < DATE_TRUNC('week', GETDATE())
              GROUP BY 1,2,3,4,5,6,7
        

## Results

In [None]:
import warnings
from statsmodels.tools.sm_exceptions import ConvergenceWarning
warnings.simplefilter('ignore', ConvergenceWarning)

raw_df = tdr.query_redshift(FINAL_SQL).to_df()

final_df = c.calculate_sample_required(df = raw_df, 
                                       effect_size_relative = EFFECT_SIZE_RELATIVE, 
                                       number_variations = NUMBER_VARIATIONS, 
                                       allocation = ALLOCATION, 
                                       power = POWER, 
                                       alpha = ALPHA)

final_df.sort_values('platform').style\
    .hide_index()\
    .set_precision(3)\