In [472]:
def infer_power(sample_sizes_to_eval, sub_iterations, meta_iterations,
                p_value_threshold, desired_power, data):
    import pandas as pd
    import statsmodels.api as sm
    import statsmodels.formula.api as smf
    import numpy as np
    from scipy.optimize import curve_fit
    
    meta_results        = []
    meta_supplement     = []
    sub_iterations_list = []
    
    df = pd.read_csv(data, index_col=False)
    df = df[['Customer_ID', 'Retailer_ID', 'Treated', 'Order_Amt']]
    print(str(len(df)) + ' observations read into Python.')
    
    master_counter = 0
    while master_counter < meta_iterations:
    
        master_results = []
        for k in sample_sizes_to_eval:
            counter = 0 
            pvalues = []
            while counter < sub_iterations:
                working_df = df.sample(k, replace=False)
                X = working_df.Treated
                X = sm.add_constant(X)
                Y = working_df.Order_Amt
                model = sm.OLS(Y, X).fit(method='pinv').get_robustcov_results(
                        'cluster', groups = working_df.Customer_ID, 
                        use_correction=True, df_correction=True)
                pvalues.append(model.pvalues[1])
                counter += 1
  
            results = []
            for i in pvalues:
                if i <= p_value_threshold:
                    results.append(1)    
                else:
                    results.append(0)  
            out = []
            out.append(k)
            out.append(p_value_threshold)
            out.append(sum(results)/len(results)) 
            master_results.append(out)

        def exp_func(x, a, b, c):
            return a * np.log(b * x) + c

        eta = []
        for i in master_results:
            eta.append(i[0])
        eta = np.asarray(eta)

        cdf = []
        for i in master_results:
            cdf.append(i[2])
        cdf = np.asarray(cdf)

        popt, pcov = curve_fit(exp_func, eta, cdf)

        recommended_n = int(np.exp((desired_power - popt[2])/popt[0])/popt[1])
        print("The recommended sample size for a statistical power level of " +
              "{} is {}.".format(desired_power, recommended_n))

        final_pvalues = []
        counter_2 = 0 
        while counter_2 < sub_iterations:
            working_df_2 = df.sample(recommended_n, replace=False)
            X = working_df_2.Treated
            X = sm.add_constant(X)
            Y = working_df_2.Order_Amt
            model_2 = sm.OLS(Y, X).fit(method='pinv').get_robustcov_results(
                      'cluster', groups = working_df_2.Customer_ID, 
                      use_correction=True, df_correction=True)
            final_pvalues.append(model_2.pvalues[1])
            counter_2 += 1

        final_results = []
        for i in final_pvalues:
            if i <= p_value_threshold:
                final_results.append(1)    
            else:
                final_results.append(0)  

        meta_results.append(sum(final_results)/len(final_results))
        meta_supplement.append(desired_power)
        sub_iterations_list.append(sub_iterations)
        master_counter += 1
        
    df_out = pd.DataFrame(list(zip(meta_supplement, meta_results))) 
    df_out.columns = ['Desired Power', 'Actual Power']
    df_out['Error'] = (df_out['Desired Power'] - df_out['Actual Power']).abs()
    df_out['Iterations'] = sub_iterations_list
    return df_out

In [473]:
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
import math

In [474]:
results_sd_3_n_500 = infer_power([50000, 70000, 90000], 500, 100, 0.05, 0.8, 'data_c_13000_mde_01.csv')
print('results_sd_3_n_500 complete.')
mu         = results_sd_3_n_500['Error'].mean()
sigma      = results_sd_3_n_500['Error'].std()
iterations = results_sd_3_n_500['Iterations'].unique()[0]
x = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)
plt.plot(x, stats.norm.pdf(x, mu, sigma))
plt.axvline(x=0, color = 'red', alpha=0.4)
plt.xlim(-0.1,1)
bbox = dict(boxstyle="round", fc="0.9")
plt.annotate('Mean Error  = {} \n   Error Std. = {} \n   Iterations = {}'.format(round(mu,2), 
                                                                                 round(sigma,2),
                                                                                 iterations),
            (xdisplay, ydisplay), xytext=(257, 200),
            xycoords='figure pixels',
            textcoords='offset points',
            bbox=bbox)
# plt.show()
plt.savefig('50k_70k_90k_n500.png')
plt.clf()

139793 observations read into Python.




The recommended sample size for a statistical power level of 0.8 is 8778.




The recommended sample size for a statistical power level of 0.8 is 2912.




The recommended sample size for a statistical power level of 0.8 is 4145.




The recommended sample size for a statistical power level of 0.8 is 1977.




The recommended sample size for a statistical power level of 0.8 is 1922.




The recommended sample size for a statistical power level of 0.8 is 7046.




The recommended sample size for a statistical power level of 0.8 is 2858.




The recommended sample size for a statistical power level of 0.8 is 1832.




The recommended sample size for a statistical power level of 0.8 is 5745.




The recommended sample size for a statistical power level of 0.8 is 6281.




The recommended sample size for a statistical power level of 0.8 is 2373.




The recommended sample size for a statistical power level of 0.8 is 2391.




The recommended sample size for a statistical power level of 0.8 is 5548.




The recommended sample size for a statistical power level of 0.8 is 2876.




The recommended sample size for a statistical power level of 0.8 is 4160.




The recommended sample size for a statistical power level of 0.8 is 10165.




The recommended sample size for a statistical power level of 0.8 is 4145.




The recommended sample size for a statistical power level of 0.8 is 2355.




The recommended sample size for a statistical power level of 0.8 is 438.




The recommended sample size for a statistical power level of 0.8 is 3532.




The recommended sample size for a statistical power level of 0.8 is 145.




The recommended sample size for a statistical power level of 0.8 is 1850.




The recommended sample size for a statistical power level of 0.8 is 5501.




The recommended sample size for a statistical power level of 0.8 is 6281.




The recommended sample size for a statistical power level of 0.8 is 2459.




The recommended sample size for a statistical power level of 0.8 is 8583.




The recommended sample size for a statistical power level of 0.8 is 3516.




The recommended sample size for a statistical power level of 0.8 is 1850.




The recommended sample size for a statistical power level of 0.8 is 4113.




The recommended sample size for a statistical power level of 0.8 is 4145.




The recommended sample size for a statistical power level of 0.8 is 2912.




The recommended sample size for a statistical power level of 0.8 is 6299.




The recommended sample size for a statistical power level of 0.8 is 3465.




The recommended sample size for a statistical power level of 0.8 is 2930.




The recommended sample size for a statistical power level of 0.8 is 6299.




The recommended sample size for a statistical power level of 0.8 is 5536.




The recommended sample size for a statistical power level of 0.8 is 2912.




The recommended sample size for a statistical power level of 0.8 is 1850.




The recommended sample size for a statistical power level of 0.8 is 1886.




The recommended sample size for a statistical power level of 0.8 is 6281.




The recommended sample size for a statistical power level of 0.8 is 4784.




The recommended sample size for a statistical power level of 0.8 is 7801.




The recommended sample size for a statistical power level of 0.8 is 1403.




The recommended sample size for a statistical power level of 0.8 is 7216.




The recommended sample size for a statistical power level of 0.8 is 4303.




The recommended sample size for a statistical power level of 0.8 is 8581.




The recommended sample size for a statistical power level of 0.8 is 4839.




The recommended sample size for a statistical power level of 0.8 is 7026.




The recommended sample size for a statistical power level of 0.8 is 4839.




The recommended sample size for a statistical power level of 0.8 is 7026.




The recommended sample size for a statistical power level of 0.8 is 5490.




The recommended sample size for a statistical power level of 0.8 is 5559.




The recommended sample size for a statistical power level of 0.8 is 2336.




The recommended sample size for a statistical power level of 0.8 is 700.




The recommended sample size for a statistical power level of 0.8 is 7801.




The recommended sample size for a statistical power level of 0.8 is 3516.




The recommended sample size for a statistical power level of 0.8 is 9363.




The recommended sample size for a statistical power level of 0.8 is 7804.




The recommended sample size for a statistical power level of 0.8 is 7797.




The recommended sample size for a statistical power level of 0.8 is 7033.




The recommended sample size for a statistical power level of 0.8 is 3516.




The recommended sample size for a statistical power level of 0.8 is 461.




The recommended sample size for a statistical power level of 0.8 is 6290.




The recommended sample size for a statistical power level of 0.8 is 1438.




The recommended sample size for a statistical power level of 0.8 is 9374.




The recommended sample size for a statistical power level of 0.8 is 4175.




The recommended sample size for a statistical power level of 0.8 is 10980.




The recommended sample size for a statistical power level of 0.8 is 11720.




The recommended sample size for a statistical power level of 0.8 is 4826.




The recommended sample size for a statistical power level of 0.8 is 2894.




The recommended sample size for a statistical power level of 0.8 is 8001.




The recommended sample size for a statistical power level of 0.8 is 7804.




The recommended sample size for a statistical power level of 0.8 is 7808.




The recommended sample size for a statistical power level of 0.8 is 10956.




The recommended sample size for a statistical power level of 0.8 is 673.




The recommended sample size for a statistical power level of 0.8 is 8582.




The recommended sample size for a statistical power level of 0.8 is 1351.




The recommended sample size for a statistical power level of 0.8 is 4853.




The recommended sample size for a statistical power level of 0.8 is 4206.




The recommended sample size for a statistical power level of 0.8 is 5525.




The recommended sample size for a statistical power level of 0.8 is 4980.




The recommended sample size for a statistical power level of 0.8 is 3797.




The recommended sample size for a statistical power level of 0.8 is 3532.




The recommended sample size for a statistical power level of 0.8 is 4812.




The recommended sample size for a statistical power level of 0.8 is 3532.




The recommended sample size for a statistical power level of 0.8 is 3465.




The recommended sample size for a statistical power level of 0.8 is 2336.




The recommended sample size for a statistical power level of 0.8 is 3646.




The recommended sample size for a statistical power level of 0.8 is 7793.




The recommended sample size for a statistical power level of 0.8 is 7026.




The recommended sample size for a statistical power level of 0.8 is 438.




The recommended sample size for a statistical power level of 0.8 is 2930.




The recommended sample size for a statistical power level of 0.8 is 5490.




The recommended sample size for a statistical power level of 0.8 is 6235.




The recommended sample size for a statistical power level of 0.8 is 9369.




The recommended sample size for a statistical power level of 0.8 is 4826.




The recommended sample size for a statistical power level of 0.8 is 1035.




The recommended sample size for a statistical power level of 0.8 is 7989.




The recommended sample size for a statistical power level of 0.8 is 9365.




The recommended sample size for a statistical power level of 0.8 is 8578.
results_sd_3_n_500 complete.


<Figure size 432x288 with 0 Axes>

In [475]:
results_sd_3_n_1000 = infer_power([50000, 70000, 90000], 1000, 100, 0.05, 0.8, 'data_c_13000_mde_01.csv')
print('results_sd_3_n_1000 complete.')
mu         = results_sd_3_n_1000['Error'].mean()
sigma      = results_sd_3_n_1000['Error'].std()
iterations = results_sd_3_n_1000['Iterations'].unique()[0]
x = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)
plt.plot(x, stats.norm.pdf(x, mu, sigma))
plt.axvline(x=0, color = 'red', alpha=0.4)
plt.xlim(-0.1,1)

bbox = dict(boxstyle="round", fc="0.9")
plt.annotate('Mean Error  = {} \n   Error Std. = {} \n   Iterations = {}'.format(round(mu,2), 
                                                                                 round(sigma,2),
                                                                                 iterations),
            (xdisplay, ydisplay), xytext=(257, 200),
            xycoords='figure pixels',
            textcoords='offset points',
            bbox=bbox)
# plt.show()
plt.savefig('50k_70k_90k_n1000.png')
plt.clf()

139793 observations read into Python.




The recommended sample size for a statistical power level of 0.8 is 4175.




The recommended sample size for a statistical power level of 0.8 is 5548.




The recommended sample size for a statistical power level of 0.8 is 7808.




The recommended sample size for a statistical power level of 0.8 is 673.




The recommended sample size for a statistical power level of 0.8 is 4129.




The recommended sample size for a statistical power level of 0.8 is 1081.




The recommended sample size for a statistical power level of 0.8 is 971.




The recommended sample size for a statistical power level of 0.8 is 1850.




The recommended sample size for a statistical power level of 0.8 is 4866.




The recommended sample size for a statistical power level of 0.8 is 1403.




The recommended sample size for a statistical power level of 0.8 is 1455.




The recommended sample size for a statistical power level of 0.8 is 4866.




The recommended sample size for a statistical power level of 0.8 is 1438.




The recommended sample size for a statistical power level of 0.8 is 3047.




The recommended sample size for a statistical power level of 0.8 is 3516.




The recommended sample size for a statistical power level of 0.8 is 2336.




The recommended sample size for a statistical power level of 0.8 is 4839.




The recommended sample size for a statistical power level of 0.8 is 3516.




The recommended sample size for a statistical power level of 0.8 is 714.




The recommended sample size for a statistical power level of 0.8 is 5548.




The recommended sample size for a statistical power level of 0.8 is 5548.




The recommended sample size for a statistical power level of 0.8 is 1067.




The recommended sample size for a statistical power level of 0.8 is 1003.




The recommended sample size for a statistical power level of 0.8 is 8581.




The recommended sample size for a statistical power level of 0.8 is 5559.




The recommended sample size for a statistical power level of 0.8 is 2876.




The recommended sample size for a statistical power level of 0.8 is 2409.




The recommended sample size for a statistical power level of 0.8 is 5571.




The recommended sample size for a statistical power level of 0.8 is 9367.




The recommended sample size for a statistical power level of 0.8 is 2912.




The recommended sample size for a statistical power level of 0.8 is 1886.




The recommended sample size for a statistical power level of 0.8 is 1977.




The recommended sample size for a statistical power level of 0.8 is 2355.




The recommended sample size for a statistical power level of 0.8 is 4812.




The recommended sample size for a statistical power level of 0.8 is 7026.




The recommended sample size for a statistical power level of 0.8 is 4853.




The recommended sample size for a statistical power level of 0.8 is 6244.




The recommended sample size for a statistical power level of 0.8 is 1444.




The recommended sample size for a statistical power level of 0.8 is 4853.




The recommended sample size for a statistical power level of 0.8 is 4993.




The recommended sample size for a statistical power level of 0.8 is 700.




The recommended sample size for a statistical power level of 0.8 is 5712.




The recommended sample size for a statistical power level of 0.8 is 7801.




The recommended sample size for a statistical power level of 0.8 is 4839.




The recommended sample size for a statistical power level of 0.8 is 4993.




The recommended sample size for a statistical power level of 0.8 is 1420.




The recommended sample size for a statistical power level of 0.8 is 7804.




The recommended sample size for a statistical power level of 0.8 is 4812.




The recommended sample size for a statistical power level of 0.8 is 1032.




The recommended sample size for a statistical power level of 0.8 is 3549.




The recommended sample size for a statistical power level of 0.8 is 3532.




The recommended sample size for a statistical power level of 0.8 is 4129.




The recommended sample size for a statistical power level of 0.8 is 2947.




The recommended sample size for a statistical power level of 0.8 is 1386.




The recommended sample size for a statistical power level of 0.8 is 5559.




The recommended sample size for a statistical power level of 0.8 is 7033.




The recommended sample size for a statistical power level of 0.8 is 5559.




The recommended sample size for a statistical power level of 0.8 is 2930.




The recommended sample size for a statistical power level of 0.8 is 5548.




The recommended sample size for a statistical power level of 0.8 is 289.




The recommended sample size for a statistical power level of 0.8 is 3499.




The recommended sample size for a statistical power level of 0.8 is 5548.




The recommended sample size for a statistical power level of 0.8 is 4145.




The recommended sample size for a statistical power level of 0.8 is 2355.




The recommended sample size for a statistical power level of 0.8 is 8579.




The recommended sample size for a statistical power level of 0.8 is 7039.




The recommended sample size for a statistical power level of 0.8 is 4160.




The recommended sample size for a statistical power level of 0.8 is 6281.




The recommended sample size for a statistical power level of 0.8 is 1438.




The recommended sample size for a statistical power level of 0.8 is 3516.




The recommended sample size for a statistical power level of 0.8 is 3465.




The recommended sample size for a statistical power level of 0.8 is 3516.




The recommended sample size for a statistical power level of 0.8 is 4826.




The recommended sample size for a statistical power level of 0.8 is 6272.




The recommended sample size for a statistical power level of 0.8 is 6281.




The recommended sample size for a statistical power level of 0.8 is 7801.




The recommended sample size for a statistical power level of 0.8 is 13337.




The recommended sample size for a statistical power level of 0.8 is 4826.




The recommended sample size for a statistical power level of 0.8 is 4839.




The recommended sample size for a statistical power level of 0.8 is 3532.




The recommended sample size for a statistical power level of 0.8 is 9363.




The recommended sample size for a statistical power level of 0.8 is 1019.




The recommended sample size for a statistical power level of 0.8 is 2876.




The recommended sample size for a statistical power level of 0.8 is 2336.




The recommended sample size for a statistical power level of 0.8 is 3012.




The recommended sample size for a statistical power level of 0.8 is 1886.




The recommended sample size for a statistical power level of 0.8 is 2912.




The recommended sample size for a statistical power level of 0.8 is 2409.




The recommended sample size for a statistical power level of 0.8 is 8582.




The recommended sample size for a statistical power level of 0.8 is 8583.




The recommended sample size for a statistical power level of 0.8 is 1003.




The recommended sample size for a statistical power level of 0.8 is 6290.




The recommended sample size for a statistical power level of 0.8 is 5536.




The recommended sample size for a statistical power level of 0.8 is 1850.




The recommended sample size for a statistical power level of 0.8 is 2336.




The recommended sample size for a statistical power level of 0.8 is 6290.




The recommended sample size for a statistical power level of 0.8 is 4145.




The recommended sample size for a statistical power level of 0.8 is 2894.




The recommended sample size for a statistical power level of 0.8 is 1886.




The recommended sample size for a statistical power level of 0.8 is 2373.
results_sd_3_n_1000 complete.


<Figure size 432x288 with 0 Axes>

In [None]:
results_sd_3_n_2000 = infer_power([50000, 70000, 90000], 2000, 100, 0.05, 0.8, 'data_c_13000_mde_01.csv')
print('results_sd_3_n_2000 complete.')
mu         = results_sd_3_n_2000['Error'].mean()
sigma      = results_sd_3_n_2000['Error'].std()
iterations = results_sd_3_n_2000['Iterations'].unique()[0]
x = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)
plt.plot(x, stats.norm.pdf(x, mu, sigma))
plt.axvline(x=0, color = 'red', alpha=0.4)
plt.xlim(-0.1,1)
bbox = dict(boxstyle="round", fc="0.9")
plt.annotate('Mean Error  = {} \n   Error Std. = {} \n   Iterations = {}'.format(round(mu,2), 
                                                                                 round(sigma,2),
                                                                                 iterations),
            (xdisplay, ydisplay), xytext=(257, 200),
            xycoords='figure pixels',
            textcoords='offset points',
            bbox=bbox)
# plt.show()
plt.savefig('50k_70k_90k_n2000.png')
plt.clf()

139793 observations read into Python.




The recommended sample size for a statistical power level of 0.8 is 6276.




The recommended sample size for a statistical power level of 0.8 is 6852.




The recommended sample size for a statistical power level of 0.8 is 5911.




The recommended sample size for a statistical power level of 0.8 is 5721.




The recommended sample size for a statistical power level of 0.8 is 4344.




The recommended sample size for a statistical power level of 0.8 is 4707.




The recommended sample size for a statistical power level of 0.8 is 3226.




The recommended sample size for a statistical power level of 0.8 is 4670.




The recommended sample size for a statistical power level of 0.8 is 1881.




The recommended sample size for a statistical power level of 0.8 is 3837.




The recommended sample size for a statistical power level of 0.8 is 4529.




The recommended sample size for a statistical power level of 0.8 is 3813.




The recommended sample size for a statistical power level of 0.8 is 4385.




The recommended sample size for a statistical power level of 0.8 is 6285.




The recommended sample size for a statistical power level of 0.8 is 5817.




The recommended sample size for a statistical power level of 0.8 is 4846.




The recommended sample size for a statistical power level of 0.8 is 2091.




The recommended sample size for a statistical power level of 0.8 is 5551.




The recommended sample size for a statistical power level of 0.8 is 5371.




The recommended sample size for a statistical power level of 0.8 is 3239.




The recommended sample size for a statistical power level of 0.8 is 2373.




The recommended sample size for a statistical power level of 0.8 is 5951.




The recommended sample size for a statistical power level of 0.8 is 4853.




The recommended sample size for a statistical power level of 0.8 is 5365.




The recommended sample size for a statistical power level of 0.8 is 4485.




The recommended sample size for a statistical power level of 0.8 is 3234.




The recommended sample size for a statistical power level of 0.8 is 3702.




The recommended sample size for a statistical power level of 0.8 is 6513.




The recommended sample size for a statistical power level of 0.8 is 4507.




The recommended sample size for a statistical power level of 0.8 is 3511.




The recommended sample size for a statistical power level of 0.8 is 1755.




The recommended sample size for a statistical power level of 0.8 is 3813.




The recommended sample size for a statistical power level of 0.8 is 8384.




The recommended sample size for a statistical power level of 0.8 is 4839.




The recommended sample size for a statistical power level of 0.8 is 6662.




The recommended sample size for a statistical power level of 0.8 is 5618.




The recommended sample size for a statistical power level of 0.8 is 4670.




The recommended sample size for a statistical power level of 0.8 is 6339.




The recommended sample size for a statistical power level of 0.8 is 2654.




The recommended sample size for a statistical power level of 0.8 is 5556.




The recommended sample size for a statistical power level of 0.8 is 4554.




The recommended sample size for a statistical power level of 0.8 is 5545.




The recommended sample size for a statistical power level of 0.8 is 3365.




The recommended sample size for a statistical power level of 0.8 is 5911.




The recommended sample size for a statistical power level of 0.8 is 4152.




The recommended sample size for a statistical power level of 0.8 is 4214.




The recommended sample size for a statistical power level of 0.8 is 4156.




The recommended sample size for a statistical power level of 0.8 is 2640.




The recommended sample size for a statistical power level of 0.8 is 3401.




The recommended sample size for a statistical power level of 0.8 is 4697.




The recommended sample size for a statistical power level of 0.8 is 7604.




The recommended sample size for a statistical power level of 0.8 is 7219.




The recommended sample size for a statistical power level of 0.8 is 3657.




The recommended sample size for a statistical power level of 0.8 is 3833.




The recommended sample size for a statistical power level of 0.8 is 5734.




The recommended sample size for a statistical power level of 0.8 is 3208.




The recommended sample size for a statistical power level of 0.8 is 3735.




The recommended sample size for a statistical power level of 0.8 is 3727.




The recommended sample size for a statistical power level of 0.8 is 7465.




The recommended sample size for a statistical power level of 0.8 is 3528.




The recommended sample size for a statistical power level of 0.8 is 2768.




The recommended sample size for a statistical power level of 0.8 is 3665.




The recommended sample size for a statistical power level of 0.8 is 6103.




The recommended sample size for a statistical power level of 0.8 is 3685.




The recommended sample size for a statistical power level of 0.8 is 4184.




The recommended sample size for a statistical power level of 0.8 is 5049.




The recommended sample size for a statistical power level of 0.8 is 4327.




The recommended sample size for a statistical power level of 0.8 is 5380.




The recommended sample size for a statistical power level of 0.8 is 4843.




The recommended sample size for a statistical power level of 0.8 is 4011.




The recommended sample size for a statistical power level of 0.8 is 728.




The recommended sample size for a statistical power level of 0.8 is 4507.




The recommended sample size for a statistical power level of 0.8 is 3059.




The recommended sample size for a statistical power level of 0.8 is 5904.




The recommended sample size for a statistical power level of 0.8 is 5773.




The recommended sample size for a statistical power level of 0.8 is 2350.




The recommended sample size for a statistical power level of 0.8 is 3257.




The recommended sample size for a statistical power level of 0.8 is 5724.




The recommended sample size for a statistical power level of 0.8 is 1975.




The recommended sample size for a statistical power level of 0.8 is 5904.




The recommended sample size for a statistical power level of 0.8 is 3532.




The recommended sample size for a statistical power level of 0.8 is 3972.




The recommended sample size for a statistical power level of 0.8 is 3516.




The recommended sample size for a statistical power level of 0.8 is 2799.




The recommended sample size for a statistical power level of 0.8 is 5203.




The recommended sample size for a statistical power level of 0.8 is 2100.




The recommended sample size for a statistical power level of 0.8 is 4481.




The recommended sample size for a statistical power level of 0.8 is 7609.




The recommended sample size for a statistical power level of 0.8 is 3681.




The recommended sample size for a statistical power level of 0.8 is 4002.




The recommended sample size for a statistical power level of 0.8 is 4188.




The recommended sample size for a statistical power level of 0.8 is 3127.




The recommended sample size for a statistical power level of 0.8 is 4046.




The recommended sample size for a statistical power level of 0.8 is 5729.




The recommended sample size for a statistical power level of 0.8 is 5033.




The recommended sample size for a statistical power level of 0.8 is 6515.




The recommended sample size for a statistical power level of 0.8 is 3652.




The recommended sample size for a statistical power level of 0.8 is 5770.




The recommended sample size for a statistical power level of 0.8 is 4488.




The recommended sample size for a statistical power level of 0.8 is 4826.
results_sd_3_n_2000 complete.


<Figure size 432x288 with 0 Axes>

In [None]:
results_sd_5_n_500 = infer_power([30000, 50000, 70000, 90000, 110000], 
                                  500, 100, 0.05, 0.8, 'data_c_13000_mde_01.csv')
print('results_sd_5_n_500 complete.')
mu         = results_sd_5_n_500['Error'].mean()
sigma      = results_sd_5_n_500['Error'].std()
iterations = results_sd_5_n_500['Iterations'].unique()[0]
x = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)
plt.plot(x, stats.norm.pdf(x, mu, sigma))
plt.axvline(x=0, color = 'red', alpha=0.4)
plt.xlim(-0.1,1)
bbox = dict(boxstyle="round", fc="0.9")
plt.annotate('Mean Error  = {} \n   Error Std. = {} \n   Iterations = {}'.format(round(mu,2), 
                                                                                 round(sigma,2),
                                                                                 iterations),
            (xdisplay, ydisplay), xytext=(257, 200),
            xycoords='figure pixels',
            textcoords='offset points',
            bbox=bbox)
# plt.show()
plt.savefig('30k_50k_70k_90k_11k_n500.png')
plt.clf()

139793 observations read into Python.
The recommended sample size for a statistical power level of 0.8 is 17433.
The recommended sample size for a statistical power level of 0.8 is 16947.
The recommended sample size for a statistical power level of 0.8 is 21082.
The recommended sample size for a statistical power level of 0.8 is 12743.
The recommended sample size for a statistical power level of 0.8 is 13595.
The recommended sample size for a statistical power level of 0.8 is 18571.
The recommended sample size for a statistical power level of 0.8 is 13014.
The recommended sample size for a statistical power level of 0.8 is 14999.
The recommended sample size for a statistical power level of 0.8 is 13273.
The recommended sample size for a statistical power level of 0.8 is 18047.
The recommended sample size for a statistical power level of 0.8 is 19522.
The recommended sample size for a statistical power level of 0.8 is 18422.
The recommended sample size for a statistical power level of 0

In [None]:
results_sd_5_n_1000 = infer_power([30000, 50000, 70000, 90000, 110000], 
                                   1000, 100, 0.05, 0.8, 'data_c_13000_mde_01.csv')
print('results_sd_5_n_1000 complete.')
mu         = results_sd_5_n_1000['Error'].mean()
sigma      = results_sd_5_n_1000['Error'].std()
iterations = results_sd_5_n_1000['Iterations'].unique()[0]
x = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)
plt.plot(x, stats.norm.pdf(x, mu, sigma))
plt.axvline(x=0, color = 'red', alpha=0.4)
plt.xlim(-0.1,1)
bbox = dict(boxstyle="round", fc="0.9")
plt.annotate('Mean Error  = {} \n   Error Std. = {} \n   Iterations = {}'.format(round(mu,2), 
                                                                                 round(sigma,2),
                                                                                 iterations),
            (xdisplay, ydisplay), xytext=(257, 200),
            xycoords='figure pixels',
            textcoords='offset points',
            bbox=bbox)
# plt.show()
plt.savefig('30k_50k_70k_90k_11k_n1000.png')
plt.clf()

In [None]:
results_sd_5_n_2000 = infer_power([30000, 50000, 70000, 90000, 110000], 
                                   2000, 100, 0.05, 0.8, 'data_c_13000_mde_01.csv')
print('results_sd_5_n_1000 complete.')
mu         = results_sd_5_n_2000['Error'].mean()
sigma      = results_sd_5_n_2000['Error'].std()
iterations = results_sd_5_n_2000['Iterations'].unique()[0]
x = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)
plt.plot(x, stats.norm.pdf(x, mu, sigma))
plt.axvline(x=0, color = 'red', alpha=0.4)
plt.xlim(-0.1,1)
bbox = dict(boxstyle="round", fc="0.9")
plt.annotate('Mean Error  = {} \n   Error Std. = {} \n   Iterations = {}'.format(round(mu,2), 
                                                                                 round(sigma,2),
                                                                                 iterations),
            (xdisplay, ydisplay), xytext=(257, 200),
            xycoords='figure pixels',
            textcoords='offset points',
            bbox=bbox)
# plt.show()
plt.savefig('30k_50k_70k_90k_11k_n2000.png')
plt.clf()

In [None]:
results_sd_7_n_500 = infer_power([10000, 30000, 50000, 70000, 90000, 110000, 130000], 
                                  500, 100, 0.05, 0.8, 'data_c_13000_mde_01.csv')
print('results_sd_7_n_500 complete.')
mu         = results_sd_7_n_500['Error'].mean()
sigma      = results_sd_7_n_500['Error'].std()
iterations = results_sd_7_n_500['Iterations'].unique()[0]
x = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)
plt.plot(x, stats.norm.pdf(x, mu, sigma))
plt.axvline(x=0, color = 'red', alpha=0.4)
plt.xlim(-0.1,1)
bbox = dict(boxstyle="round", fc="0.9")
plt.annotate('Mean Error  = {} \n   Error Std. = {} \n   Iterations = {}'.format(round(mu,2), 
                                                                                 round(sigma,2),
                                                                                 iterations),
            (xdisplay, ydisplay), xytext=(257, 200),
            xycoords='figure pixels',
            textcoords='offset points',
            bbox=bbox)
# plt.show()
plt.savefig('10k_30k_50k_70k_90k_11k_13k_n500.png')
plt.clf()

In [None]:
results_sd_7_n_1000 = infer_power([10000, 30000, 50000, 70000, 90000, 110000, 130000], 
                                  1000, 100, 0.05, 0.8, 'data_c_13000_mde_01.csv')
print('results_sd_7_n_1000 complete.')
mu         = results_sd_7_n_1000['Error'].mean()
sigma      = results_sd_7_n_1000['Error'].std()
iterations = results_sd_7_n_1000['Iterations'].unique()[0]
x = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)
plt.plot(x, stats.norm.pdf(x, mu, sigma))
plt.axvline(x=0, color = 'red', alpha=0.4)
plt.xlim(-0.1,1)
bbox = dict(boxstyle="round", fc="0.9")
plt.annotate('Mean Error  = {} \n   Error Std. = {} \n   Iterations = {}'.format(round(mu,2), 
                                                                                 round(sigma,2),
                                                                                 iterations),
            (xdisplay, ydisplay), xytext=(257, 200),
            xycoords='figure pixels',
            textcoords='offset points',
            bbox=bbox)
# plt.show()
plt.savefig('10k_30k_50k_70k_90k_11k_13k_n1000.png')
plt.clf()

In [None]:
results_sd_7_n_2000 = infer_power([10000, 30000, 50000, 70000, 90000, 110000, 130000], 
                                   2000, 100, 0.05, 0.8, 'data_c_13000_mde_01.csv')
print('results_sd_7_n_2000 complete.')
mu         = results_sd_7_n_2000['Error'].mean()
sigma      = results_sd_7_n_2000['Error'].std()
iterations = results_sd_7_n_2000['Iterations'].unique()[0]
x = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)
plt.plot(x, stats.norm.pdf(x, mu, sigma))
plt.axvline(x=0, color = 'red', alpha=0.4)
plt.xlim(-0.1,1)
bbox = dict(boxstyle="round", fc="0.9")
plt.annotate('Mean Error  = {} \n   Error Std. = {} \n   Iterations = {}'.format(round(mu,2), 
                                                                                 round(sigma,2),
                                                                                 iterations),
            (xdisplay, ydisplay), xytext=(257, 200),
            xycoords='figure pixels',
            textcoords='offset points',
            bbox=bbox)
# plt.show()
plt.savefig('10k_30k_50k_70k_90k_11k_13k_n2000.png')
plt.clf()