<center><h1 style="background-color: #C6F3CD; border-radius: 10px; color: #FFFFFF; padding: 5px;">
How to calculate sample size ?
</h1><center/>

**Link to the article** : https://medium.com/@soulawalid/data-science-how-to-calculate-sample-size-d9af786219ec?sk=19651b804cac2dbcb1eb7fb004c5940c

In [1]:
import math

#  Sampling

### 1 - Simple Random Sampling

In [2]:
def sample_size_simple_random(Z, p, e):
    return math.ceil((Z**2 * p * (1 - p)) / e**2)

Z = 1.96  # 95% confidence
p = 0.5   # Estimated proportion
e = 0.05  # Margin of error

sample_size = sample_size_simple_random(Z, p, e)
print(f'Simple Random Sample Size: {sample_size}')

Simple Random Sample Size: 385


### 2 - Systematic Sampling

In [3]:
def sample_size_systematic(N, n):
    return N // n

# Example parameters
N = 1000  # Population size
n = 100   # Desired sample size

k = sample_size_systematic(N, n)
print(f'Systematic Sampling Interval: {k}')

Systematic Sampling Interval: 10


### 3 - Stratified Sampling

In [4]:
def sample_size_stratified(population_strata, sample_size_strata):
    return sum(sample_size_strata)

population_strata = [200, 300, 500]
sample_size_strata = [20, 30, 50]

total_sample_size = sample_size_stratified(population_strata, sample_size_strata)
print(f'Stratified Sample Size: {total_sample_size}')

Stratified Sample Size: 100


### 4 - Cluster Sampling

In [5]:
def sample_size_cluster(N, c, k):
    return (N * c) // k

N = 1000  # Population size
c = 10    # Number of clusters selected
k = 50    # Total number of clusters

sample_size = sample_size_cluster(N, c, k)
print(f'Cluster Sample Size: {sample_size}')

Cluster Sample Size: 200


### 5 - Snowball Sampling

In [6]:
def sample_size_snowball(initial_subjects, recruitment_factor, waves):
    return initial_subjects * (recruitment_factor ** waves)

# Example parameters
initial_subjects = 5
recruitment_factor = 2
waves = 3

sample_size = sample_size_snowball(initial_subjects, recruitment_factor, waves)
print(f'Snowball Sample Size: {sample_size}')

Snowball Sample Size: 40


# Sample size calculation

### 1 - Yamane's Formula for Sample Size

In [9]:
def sample_size_yamane(N, e):
    return math.ceil(N / (1 + N * e ** 2))

# Example parameters
N = 1000  # Population size
e = 0.05  # Margin of error

sample_size = sample_size_yamane(N, e)
print(f'Yamane\'s Sample Size: {sample_size}')

Yamane's Sample Size: 286


### 2 - Cochran's Formula for Sample Size

In [14]:
#Infinite

def calculate_cochran_sample_size(Z, p, e, N=None):
    n_0 = (Z**2 * p * (1 - p)) / e**2
    if N is not None:
        n = n_0 / (1 + (n_0 - 1) / N)
    else:
        n = n_0
    return math.ceil(n)

Z = 1.96  # Z-value for 95% confidence level
p = 0.5   # Estimated proportion
e = 0.05  # Desired margin of error

sample_size_infinite = calculate_cochran_sample_size(Z, p, e)
print(f'Sample size for infinite population: {sample_size_infinite}')

# Sample size for infinite population: 385

Sample size for infinite population: 385


In [16]:
#Finite

def calculate_sample_size_mean(Z, sigma, e, N=None):
    n_0 = (Z**2 * sigma**2) / e**2
    if N is not None:
        n = n_0 / (1 + (n_0 - 1) / N)
    else:
        n = n_0
    return math.ceil(n)

Z = 1.96  # Z-value for 95% confidence level
sigma = 10  # Population standard deviation
e = 2  # Desired margin of error
N = 1000  # Population size

sample_size_finite = calculate_sample_size_mean(Z, sigma, e, N)
print(f'Sample size for finite population: {sample_size_finite}')
# Sample size for finite population: 88

Sample size for finite population: 88


### 3 - Sample Size for Population Mean Confidence Interval

In [17]:
# Infinite

def sample_size_mean_confidence(Z, sigma, E):
    return math.ceil((Z * sigma / E) ** 2)

Z = 1.96  # 95% confidence
sigma = 5  # Estimated population standard deviation
E = 1     # Margin of error
sample_size = sample_size_mean_confidence(Z, sigma, E)
print(f'Sample Size for Population Mean Confidence Interval: {sample_size}')

#Sample Size for Population Mean Confidence Interval: 97

Sample Size for Population Mean Confidence Interval: 97


In [18]:
# Finite

def calculate_sample_size_mean(Z, sigma, e, N=None):
    n_0 = (Z**2 * sigma**2) / e**2
    if N is not None:
        n = n_0 / (1 + (n_0 - 1) / N)
    else:
        n = n_0
    return math.ceil(n)

Z = 1.96  # Z-value for 95% confidence level
sigma = 10  # Population standard deviation
e = 2  # Desired margin of error
N = 1000  # Population size

sample_size_finite = calculate_sample_size_mean(Z, sigma, e, N)
print(f'Sample size for finite population: {sample_size_finite}')
# Sample size for finite population: 88

Sample size for finite population: 88
