In [1]:
def infer_power(sample_sizes_to_eval, sub_iterations, meta_iterations,
                p_value_threshold, desired_power, data, file_out): 
    import pandas as pd
    import statsmodels.api as sm
    import statsmodels.formula.api as smf
    import numpy as np
    from scipy.optimize import curve_fit
    import time    

    start = time.time()
    
    meta_results        = []
    meta_supplement     = []
    sub_iterations_list = []
    
    max_n = len(data)
    
    master_counter = 0
    while master_counter < meta_iterations:
        try:
            master_results = []
            for k in sample_sizes_to_eval:
                counter = 0 
                pvalues = []
                while counter < sub_iterations:
                    working_df = data.sample(k, replace=False)
                    X = working_df.Treated
                    X = sm.add_constant(X)
                    Y = working_df.Order_Amt
                    model = sm.OLS(Y, X).fit(method='pinv').get_robustcov_results(
                            'cluster', groups = working_df.Customer_ID, 
                            use_correction=True, df_correction=True)
                    pvalues.append(model.pvalues[1])
                    counter += 1

                results = []
                for i in pvalues:
                    if i <= p_value_threshold:
                        results.append(1)    
                    else:
                        results.append(0)  
                out = []
                out.append(k)
                out.append(p_value_threshold)
                p_val = sum(results)/len(results)
                print(p_val)
                out.append(p_val) 
                master_results.append(out)

            def exp_func(x, a, b, c):
                return a * np.log(b * x) + c

            eta = []
            for i in master_results:
                eta.append(i[0])
            eta = np.asarray(eta)

            cdf = []
            for i in master_results:
                cdf.append(i[2])
            cdf = np.asarray(cdf)

            popt, pcov = curve_fit(exp_func, eta, cdf)


            recommended_n = int(np.exp((desired_power - popt[2])/popt[0])/popt[1])
            if recommended_n > max_n:
                recommended_n = max_n

            print("The recommended sample size for a statistical power level of " +
                  "{} is {}.".format(desired_power, recommended_n))

            final_pvalues = []
            counter_2 = 0 
            while counter_2 < 500:
                if counter_2 % 50 == 0:                       
                    print("Verification {} complete.".format(round((counter_2/500), 2)))
                working_df_2 = data.sample(recommended_n, replace=False)
                X = working_df_2.Treated
                X = sm.add_constant(X)
                Y = working_df_2.Order_Amt
                model_2 = sm.OLS(Y, X).fit(method='pinv').get_robustcov_results(
                          'cluster', groups = working_df_2.Customer_ID, 
                          use_correction=True, df_correction=True)
                final_pvalues.append(model_2.pvalues[1])
                counter_2 += 1

            final_results = []
            for i in final_pvalues:
                if i <= p_value_threshold:
                    final_results.append(1)    
                else:
                    final_results.append(0)  

            meta_results.append(sum(final_results)/len(final_results))
            meta_supplement.append(desired_power)
            sub_iterations_list.append(sub_iterations)
            master_counter += 1
            print("{} iterations sucessfully completed.".format(master_counter))
            end = time.time()
            print(str(end - start) + ' time elapsed.')
            
        except:
            print("Power curve inference failed.")        
        
    df_out = pd.DataFrame(list(zip(meta_supplement, meta_results))) 
    df_out.columns = ['Desired Power', 'Actual Power']
    df_out['Error'] = (df_out['Desired Power'] - df_out['Actual Power'])
    df_out['Absolute Error'] =  df_out['Error'].abs()
    df_out['Iterations'] = sub_iterations_list
    total_iterations = (len(sample_sizes_to_eval)*sub_iterations)+500
    df_out['Total Iterations'] = total_iterations
    df_out.to_csv(file_out)

In [2]:
import pandas as pd
data_file_to_read = pd.read_csv('data_c_1000000_mde_01.csv')
infer_power([          8000,10000,12000],             300, 30, 0.05, 0.8, data_file_to_read, 'df7.csv')
print("DF 7 created.")
infer_power([     6000,8000,10000,12000,14000],       300, 30, 0.05, 0.8, data_file_to_read, 'df8.csv')
print("DF 8 created.")
infer_power([4000,6000,8000,10000,12000,14000,16000], 300, 30, 0.05, 0.8, data_file_to_read, 'df9.csv')
print("DF 9 created.")

0.37666666666666665
0.5133333333333333
0.5233333333333333
The recommended sample size for a statistical power level of 0.8 is 23940.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
1 iterations sucessfully completed.
1313.7272734642029 time elapsed.
0.42
0.46
0.5633333333333334
The recommended sample size for a statistical power level of 0.8 is 24722.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
2 iterations sucessfully completed.
2455.6818811893463 time elapsed.
0.45
0.49
0.5733333333333334
The recommended sample size for a statistical power level of 0.8 is 26457.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
3 iterations sucessfully completed.
3815.754499197006 time elapsed.
0.44333333333333336
0.4766666666666667
0.5666666666666667
The recommended sample size for a statistical power level of 0.8 is 27356.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
4 iterations sucessfully completed.
5596.408175468445 time elapsed.
0.4666666666666667
0.5266666666666666
0.5866666666666667
The recommended sample size for a statistical power level of 0.8 is 24919.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
5 iterations sucessfully completed.
7703.756777763367 time elapsed.
0.44333333333333336
0.49
0.53
The recommended sample size for a statistical power level of 0.8 is 42557.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
6 iterations sucessfully completed.
9770.628280162811 time elapsed.
0.41333333333333333
0.44666666666666666
0.6033333333333334
The recommended sample size for a statistical power level of 0.8 is 19538.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
7 iterations sucessfully completed.
11614.418525218964 time elapsed.
0.41
0.47333333333333333
0.5966666666666667
The recommended sample size for a statistical power level of 0.8 is 19387.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
8 iterations sucessfully completed.
13576.2004134655 time elapsed.
0.46
0.5133333333333333
0.5733333333333334
The recommended sample size for a statistical power level of 0.8 is 27441.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
9 iterations sucessfully completed.
15659.93603014946 time elapsed.
0.37666666666666665
0.49666666666666665
0.5766666666666667
The recommended sample size for a statistical power level of 0.8 is 18705.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
10 iterations sucessfully completed.
17625.581275701523 time elapsed.
0.38333333333333336
0.48333333333333334
0.5633333333333334
The recommended sample size for a statistical power level of 0.8 is 20431.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
11 iterations sucessfully completed.
19666.447730779648 time elapsed.
0.43
0.5033333333333333
0.5533333333333333
The recommended sample size for a statistical power level of 0.8 is 26759.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
12 iterations sucessfully completed.
21703.621162891388 time elapsed.
0.39
0.5
0.5933333333333334
The recommended sample size for a statistical power level of 0.8 is 18149.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
13 iterations sucessfully completed.
23683.162271261215 time elapsed.
0.3566666666666667
0.4766666666666667
0.56
The recommended sample size for a statistical power level of 0.8 is 19227.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
14 iterations sucessfully completed.
25584.137220859528 time elapsed.
0.41333333333333333
0.5
0.5933333333333334
The recommended sample size for a statistical power level of 0.8 is 19353.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
15 iterations sucessfully completed.
27492.31850385666 time elapsed.
0.4266666666666667
0.52
0.6533333333333333
The recommended sample size for a statistical power level of 0.8 is 15965.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
16 iterations sucessfully completed.
29394.928953886032 time elapsed.
0.4
0.5266666666666666
0.5533333333333333
The recommended sample size for a statistical power level of 0.8 is 21872.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
17 iterations sucessfully completed.
31384.98542523384 time elapsed.
0.4533333333333333
0.5033333333333333
0.5433333333333333
The recommended sample size for a statistical power level of 0.8 is 38093.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
18 iterations sucessfully completed.
33420.677693367004 time elapsed.
0.4166666666666667
0.5133333333333333
0.5166666666666667
The recommended sample size for a statistical power level of 0.8 is 34554.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
19 iterations sucessfully completed.
35391.19507908821 time elapsed.
0.4633333333333333
0.5033333333333333
0.5933333333333334
The recommended sample size for a statistical power level of 0.8 is 23967.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
20 iterations sucessfully completed.
37298.1448738575 time elapsed.
0.44666666666666666
0.5166666666666667
0.5633333333333334
The recommended sample size for a statistical power level of 0.8 is 27041.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
21 iterations sucessfully completed.
39384.228984594345 time elapsed.
0.43
0.47
0.59
The recommended sample size for a statistical power level of 0.8 is 21615.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
22 iterations sucessfully completed.
41382.06367778778 time elapsed.
0.43
0.5133333333333333
0.5633333333333334
The recommended sample size for a statistical power level of 0.8 is 24288.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
23 iterations sucessfully completed.
43324.35413861275 time elapsed.
0.41333333333333333
0.5166666666666667
0.5566666666666666
The recommended sample size for a statistical power level of 0.8 is 23114.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
24 iterations sucessfully completed.
45117.32261443138 time elapsed.
0.41333333333333333
0.51
0.59
The recommended sample size for a statistical power level of 0.8 is 19442.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
25 iterations sucessfully completed.
46208.26343202591 time elapsed.
0.3433333333333333
0.4633333333333333
0.6066666666666667
The recommended sample size for a statistical power level of 0.8 is 16421.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
26 iterations sucessfully completed.
47173.357015132904 time elapsed.
0.3933333333333333
0.4766666666666667
0.53
The recommended sample size for a statistical power level of 0.8 is 26416.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
27 iterations sucessfully completed.
48149.631136894226 time elapsed.
0.41
0.44333333333333336
0.5466666666666666
The recommended sample size for a statistical power level of 0.8 is 27075.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
28 iterations sucessfully completed.
49119.66785621643 time elapsed.
0.4266666666666667
0.5166666666666667
0.5533333333333333
The recommended sample size for a statistical power level of 0.8 is 25600.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
29 iterations sucessfully completed.
50084.116990327835 time elapsed.
0.3433333333333333
0.5266666666666666
0.5966666666666667
The recommended sample size for a statistical power level of 0.8 is 16138.
Verification 0.0 complete.




Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
30 iterations sucessfully completed.
51051.4775493145 time elapsed.
DF 7 created.
0.3
0.4
0.5033333333333333
0.5166666666666667
0.65
The recommended sample size for a statistical power level of 0.8 is 22344.
Verification 0.0 complete.
Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
1 iterations sucessfully completed.
1312.0819730758667 time elapsed.
0.2833333333333333
0.43333333333333335
0.48
0.56
0.6333333333333333
The recommended sample size for a statistical power level of 0.8 is 21675.
Verification 0.0 complete.
Verification 0.1 complete.
Verification 



Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
16 iterations sucessfully completed.
23386.962555408478 time elapsed.
0.30333333333333334
0.39
0.5366666666666666
0.6266666666666667
0.6666666666666666
The recommended sample size for a statistical power level of 0.8 is 18252.
Verification 0.0 complete.
Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
17 iterations sucessfully completed.
24900.99063014984 time elapsed.
0.30666666666666664
0.4266666666666667
0.49
0.5966666666666667
0.6366666666666667
The recommended sample size for a statistical power level of 0.8 is 20931.
Verification 0.0 complete.
Verific

0.35333333333333333
0.38333333333333336
0.47
0.5866666666666667
0.7066666666666667
0.6866666666666666
The recommended sample size for a statistical power level of 0.8 is 21872.
Verification 0.0 complete.
Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
3 iterations sucessfully completed.
10132.550481796265 time elapsed.
0.23
0.33666666666666667
0.4066666666666667
0.48333333333333334
0.5866666666666667
0.6166666666666667
0.6733333333333333
The recommended sample size for a statistical power level of 0.8 is 24667.
Verification 0.0 complete.
Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
4 iterations suces

Verification 0.8 complete.
Verification 0.9 complete.
18 iterations sucessfully completed.
41344.40796518326 time elapsed.
0.22
0.2833333333333333
0.4166666666666667
0.5166666666666667
0.61
0.6233333333333333
0.6966666666666667
The recommended sample size for a statistical power level of 0.8 is 21948.
Verification 0.0 complete.
Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 0.6 complete.
Verification 0.7 complete.
Verification 0.8 complete.
Verification 0.9 complete.
19 iterations sucessfully completed.
42995.50786757469 time elapsed.
0.22666666666666666
0.33666666666666667
0.39
0.47333333333333333
0.5766666666666667
0.6166666666666667
0.67
The recommended sample size for a statistical power level of 0.8 is 25180.
Verification 0.0 complete.
Verification 0.1 complete.
Verification 0.2 complete.
Verification 0.3 complete.
Verification 0.4 complete.
Verification 0.5 complete.
Verification 