In [2]:
# import the necessary libraries.
import pandas as pd
import scipy.stats as st
import numpy as np
from math import ceil ##For rounding off the values
from statsmodels.stats.power import TTestPower

### Test for Difference in Means

In [49]:
?TTestPower.solve_power

[1;31mSignature:[0m
[0mTTestPower[0m[1;33m.[0m[0msolve_power[0m[1;33m([0m[1;33m
[0m    [0mself[0m[1;33m,[0m[1;33m
[0m    [0meffect_size[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mnobs[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0malpha[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mpower[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0malternative[0m[1;33m=[0m[1;34m'two-sided'[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
solve for any one parameter of the power of a one sample t-test

for the one sample t-test the keywords are:
    effect_size, nobs, alpha, power

Exactly one needs to be ``None``, all others need numeric values.

This test can also be used for a paired t-test, where effect size is
defined in terms of the mean difference, and nobs is the number of
pairs.

Parameters
----------
effect_size : float
    Standardized effect size.The effect size is

As you saw in the documentation, this method can work for both one-sample t-test as well as paired tests. \
You need to provide the following parameters to get the sample size:
- Effect Size
- Alpha
- Power

Using the abovevalues, it will calculate the sample size.

In [50]:
## Example values to test the effect size.
d = 10 # difference in means.
sd = 20 # Standard deviation.
effect_size = d / sd # Effect size.
alpha = 0.05 # Level of significance
power = 0.8

In [51]:
p_analysis = TTestPower()
sample_size = p_analysis.solve_power(effect_size=effect_size, alpha=alpha, power=power)

In [52]:
print(f"Required Sample size: {ceil(sample_size)}")

Required Sample size: 34


### Test for difference in Proportions.

In [53]:
## import the necessary libraries.
import statsmodels.stats.api as sms

We shall be using the following methods:
- sms.proportion_effectsize()
- sms.NormalIndPower.solve_power()

In [55]:
## Let's check the documentation for then
?sms.proportion_effectsize

[1;31mSignature:[0m [0msms[0m[1;33m.[0m[0mproportion_effectsize[0m[1;33m([0m[0mprop1[0m[1;33m,[0m [0mprop2[0m[1;33m,[0m [0mmethod[0m[1;33m=[0m[1;34m'normal'[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
Effect size for a test comparing two proportions

for use in power function

Parameters
----------
prop1, prop2 : float or array_like
    The proportion value(s).

Returns
-------
es : float or ndarray
    effect size for (transformed) prop1 - prop2

Notes
-----
only method='normal' is implemented to match pwr.p2.test
see http://www.statmethods.net/stats/power.html

Effect size for `normal` is defined as ::

    2 * (arcsin(sqrt(prop1)) - arcsin(sqrt(prop2)))

I think other conversions to normality can be used, but I need to check.

Examples
--------
>>> import statsmodels.api as sm
>>> sm.stats.proportion_effectsize(0.5, 0.4)
0.20135792079033088
>>> sm.stats.proportion_effectsize([0.3, 0.4, 0.5], 0.4)
array([-0.21015893,  0.        ,  0.20135792

In [56]:
?sms.NormalIndPower.solve_power

[1;31mSignature:[0m
[0msms[0m[1;33m.[0m[0mNormalIndPower[0m[1;33m.[0m[0msolve_power[0m[1;33m([0m[1;33m
[0m    [0mself[0m[1;33m,[0m[1;33m
[0m    [0meffect_size[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mnobs1[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0malpha[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mpower[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mratio[0m[1;33m=[0m[1;36m1.0[0m[1;33m,[0m[1;33m
[0m    [0malternative[0m[1;33m=[0m[1;34m'two-sided'[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
solve for any one parameter of the power of a two sample z-test

for z-test the keywords are:
    effect_size, nobs1, alpha, power, ratio

exactly one needs to be ``None``, all others need numeric values

Parameters
----------
effect_size : float
    standardized effect size, difference between the two means divided
    by the standard deviation.
  

In [58]:
Pc = 0.04 # Proportion of control
Pt = 0.05 # Proportion of treatment
effect_size = sms.proportion_effectsize(Pc, Pt)

In [59]:
print(f"The effect size is: {effect_size}")

The effect size is: -0.048310970215600824


In [60]:
?sms.NormalIndPower.solve_power

[1;31mSignature:[0m
[0msms[0m[1;33m.[0m[0mNormalIndPower[0m[1;33m.[0m[0msolve_power[0m[1;33m([0m[1;33m
[0m    [0mself[0m[1;33m,[0m[1;33m
[0m    [0meffect_size[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mnobs1[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0malpha[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mpower[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mratio[0m[1;33m=[0m[1;36m1.0[0m[1;33m,[0m[1;33m
[0m    [0malternative[0m[1;33m=[0m[1;34m'two-sided'[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m
solve for any one parameter of the power of a two sample z-test

for z-test the keywords are:
    effect_size, nobs1, alpha, power, ratio

exactly one needs to be ``None``, all others need numeric values

Parameters
----------
effect_size : float
    standardized effect size, difference between the two means divided
    by the standard deviation.
  

As you saw in the documentation, this method is similar to the previous method where we calculated the sample size for difference in means \
You need to provide the following parameters to get the sample size:
- effect size
- alpha
- power

In [64]:
required_n = sms.NormalIndPower().solve_power(effect_size, power=0.8, alpha=0.05)

In [65]:
print(f"The required sample size is: {ceil(required_n)}")

The required sample size is: 6726


### Additional Observation

You will notice that

- ___If the treatment effect is small, you will require more samples___ to detect a statistically significant treatment effect with a high probability of accuracy.
- ___if the treatment effect is large, you will require less samples___ to detect a statistically significant treatment effect with a hight probability of accuracy.



In [96]:
Pc = 0.04 # Proportion of control
Pt = 0.1 # Proportion of treatment
effect_size = sms.proportion_effectsize(Pc, Pt)

In [97]:
print(f"The effect size is: {effect_size}")

The effect size is: -0.24078526721262278


In [98]:
required_n = sms.NormalIndPower().solve_power(effect_size, power=0.8, alpha=0.05)

In [99]:
print(f"The required sample size is: {ceil(required_n)}")

The required sample size is: 271
