# Randomized Controlled Experiments

In [None]:
from datascience import *
from cs104 import *
import numpy as np
%matplotlib inline

## 1. Warm-up Permutation Test

In [None]:
survey = Table().read_table('data/prelab01-survey-fall2024.csv')
survey = survey.where('Left or Right Handed', are.not_equal_to('Ambidextrous'))
survey

In [None]:
survey.group('Left or Right Handed')

In [None]:
observed = abs_difference_of_means(survey, 'Left or Right Handed', 'Height (in inches)')
observed

Is the height difference significant?

In [None]:
results = simulate_permutation_statistic(survey, 'Left or Right Handed', 'Height (in inches)', 5000)

In [None]:
plot = Table().with_columns('abs_difference_of_means', results).hist(left_end=observed)
plot.set_title('Null hypothesis empirical distibution')
plot.dot(observed)

In [None]:
p_value = empirical_pvalue(results, observed)
p_value

## 2. Randomized Controlled Experiment with BTA

In [None]:
rct = Table.read_table('data/bta.csv')
rct.sample(10)

In [None]:
rct.group('Group')

In [None]:
rct.pivot('Result', 'Group')

In [None]:
rct.group('Group', np.mean)

### Permutation Testing

In [None]:
observed_statistic = abs_difference_of_means(rct, 'Group', 'Result')
observed_statistic

In [None]:
type(observed_statistic)

In [None]:
results = simulate_permutation_statistic(rct, 'Group', 'Result', 2000)

In [None]:
plot = Table().with_columns('Abs Difference in Relief Proportions', results).hist(bins=np.arange(0,0.9,1/16))
plot.set_title('Null hypothesis empirical distibution')
plot.dot(observed_statistic)

In [None]:
p_value = empirical_pvalue(results, observed_statistic)
p_value

## 3. Sample Size, Effect Size, and P-values

What's the relationship between effect size, sample size, and p-value?

In [None]:

def back_pain_exploration(observed_sample_size, treatment_prop_effective, control_prop_effective): 
    """Don't worry about this code -- it's just to visualize size/effect/p-values."""
    # Split sample into half treated and half control 
    num_treated = int(np.floor(observed_sample_size/2))
    num_control = observed_sample_size - num_treated
    
    # Make table of data 
    group = ["Treatment"]*num_treated + ["Control"]*num_control 
    treat1 = int(np.floor(treatment_prop_effective*num_treated))
    treat0 = num_treated - treat1
    control1 = int(np.floor(control_prop_effective*num_control))
    control0= num_control - control1
    result = [1]*treat1 + [0]*treat0 + [1]*control1 + [0]*control0
    rct_fake = Table().with_columns("Group", group, "Result", result)
    
    #Calculate and plot results
    observed_statistic = abs_difference_of_means(rct_fake, 'Group', 'Result')
    results = simulate_permutation_statistic(rct_fake, 'Group', 'Result', 1000)
    pvalue = empirical_pvalue(results, observed_statistic)
    plot = Table().with_columns('Statistic: Abs difference in relief proportions', results).hist(bins=np.arange(0,0.8,1/16),left_end=observed_statistic)
    plot.set_title('Sample size=' + str(observed_sample_size) + '\nObserved statistic=' + str(np.round(observed_statistic,2)) + '\np-value='+str(pvalue))
    plot.dot(observed_statistic)  

What we had before. 

In [None]:
back_pain_exploration(31, 0.6, 0.125)    

What if the effect size was slightly smaller? What if the sample size was bigger? 

In [None]:
with Figure(1,3,figsize=(5,4)):
    back_pain_exploration(31, 0.6, 0.125)    
    back_pain_exploration(31, 0.5, 0.15)
    back_pain_exploration(51, 0.5, 0.15)

Let's look at all these relationships at once! 

In [None]:

interact(back_pain_exploration, 
         observed_sample_size=Slider(10, 100, 1), 
         treatment_prop_effective=Slider(0.05, 0.95, 0.01),
         control_prop_effective=Slider(0.05, 0.95, 0.01))