In [1]:
import pandas as pd
import supporting_functions as sf

observations = 2500000
num_retailers = 10000
retailer_loc = 100          
retailer_scale = 10
noise_loc = 0.0
noise_scale = 10
absolute_treatment_effect = 0.25

file_name = sf.create_sim_data(absolute_treatment_effect, observations, num_retailers, retailer_loc,             
                          retailer_scale, noise_loc, noise_scale)
data = pd.read_csv(file_name)

In [2]:
treatment_variable = 'Treated'
covariates = ['Mean_Retailer_Amount']
dependent_variable = 'Initial_Order_Amount'
absolute_mde          = 0.25
rejection_region      = 0.05
desired_power         = 0.8
search_orders         = 1
points_per_iteration  = 4
sims                  = 300
precison              = 0.025
model                 = 'isotonic'

############################################################

iterations   = 5
n_vector     = []
power_vector = []
sims_vector  = []
time_vector  = []
model_vector = []

for i in range(0, iterations):
    n, n_power, sims_used, time_taken = sf.derive_sample_size_recommendation(data, 
                                           treatment_variable, covariates, 
                                           dependent_variable, absolute_mde, 
                                           rejection_region, desired_power, 
                                           search_orders, points_per_iteration, 
                                           sims, precison, model) 
    n_vector.append(n)
    power_vector.append(n_power)
    sims_vector.append(sims_used)
    time_vector.append(time_taken) 
    model_vector.append(model)
    
model = 'exponential_cdf'

for i in range(0, iterations):
    n, n_power, sims_used, time_taken = sf.derive_sample_size_recommendation(data, 
                                           treatment_variable, covariates, 
                                           dependent_variable, absolute_mde, 
                                           rejection_region, desired_power, 
                                           search_orders, points_per_iteration, 
                                           sims, precison, model) 
    n_vector.append(n)
    power_vector.append(n_power)
    sims_vector.append(sims_used)
    time_vector.append(time_taken) 
    model_vector.append(model)

Determining direction of search.
Starting sample size recommendation is 19,784.
Estimating the effective power of n = 19,784 using 300 simulations.
The effective power of sample size n = 19,784 is 41.2%.
Starting power is 41.2%.
Sample size of n = 19,784 is under-powered.
Proposed sample size of 19,784 fails to meet desired power of 80%.
The lower-bound for the proposed sample size search is 19,784.
The upper-bound for the proposed sample size search is 197,840.
5 candidate sample sizes were recommended within the range 19,784 to 197,840.
Proceeding with interpolation via isotonic regression.
Estimating the effective power of n = 55,395 using 300 simulations.
The effective power of sample size n = 55,395 is 85.71%.
Estimating the effective power of n = 91,006 using 300 simulations.
The effective power of sample size n = 91,006 is 98.34%.
Estimating the effective power of n = 126,617 using 300 simulations.
The effective power of sample size n = 126,617 is 99.34%.
Estimating the effectiv

The effective power of sample size n = 126,617 is 51.16%.
Estimating the effective power of n = 162,228 using 300 simulations.


KeyboardInterrupt: 

In [4]:
labels = []
for i in range(0, iterations*2):
    labels.append(file_name)    
data = [labels, n_vector, power_vector, sims_vector, time_vector, model_vector]
df = pd.DataFrame.from_records(data).T
df.columns = ['File', 'Recommended n', 'Est. Power', 'Simulations Used', 'Seconds', 'Model']
df

Unnamed: 0,File,Recommended n,Est. Power,Simulations Used,Seconds,Model
0,./data/sim_data_2019_12_23_083149.csv,45317,0.787375,3600,374,isotonic
1,./data/sim_data_2019_12_23_083149.csv,145847,0.787375,2700,578,isotonic
2,./data/sim_data_2019_12_23_083149.csv,55395,0.817276,600,81,isotonic
3,./data/sim_data_2019_12_23_083149.csv,126617,0.79402,1200,254,isotonic
4,./data/sim_data_2019_12_23_083149.csv,,,,,
5,./data/sim_data_2019_12_23_083149.csv,,,,,
6,./data/sim_data_2019_12_23_083149.csv,,,,,
7,./data/sim_data_2019_12_23_083149.csv,,,,,
8,./data/sim_data_2019_12_23_083149.csv,,,,,
9,./data/sim_data_2019_12_23_083149.csv,,,,,


In [None]:
df.to_csv('results.csv')