# EXAMPLE 1

In [1]:
import numpy as np
from scipy import stats

# Our campaign data
clicks = 92        # Successes (click-throughs)
emails = 400      # Total sample size
confidence = 0.95  # We want 95% confidence

# First, calculate our sample proportion
p_hat = clicks / emails
print(f"Sample proportion (p̂): {p_hat:.3f}")
# This is 0.230 or 23.0% click-through rate

# Before we proceed, let's verify our success-failure condition
successes = emails * p_hat            # np̂
failures = emails * (1 - p_hat)       # n(1-p̂)

print("\nChecking success-failure condition:")
print(f"Successes (np̂): {successes:.1f}")        # Need ≥ 10
print(f"Failures (n(1-p̂)): {failures:.1f}")      # Need ≥ 10

# Since both values exceed 10, we can proceed with our analysis
# Now calculate standard error
standard_error = np.sqrt((p_hat * (1 - p_hat)) / emails)
print(f"\nStandard Error: {standard_error:.4f}")

# Find critical value for 95% confidence
z_critical = stats.norm.ppf((1 + confidence) / 2)
print(f"Critical value (z*): {z_critical:.4f}")

# Calculate margin of error
margin_of_error = z_critical * standard_error
print(f"Margin of Error: {margin_of_error:.4f}")

# Calculate confidence interval
ci_lower = p_hat - margin_of_error
ci_upper = p_hat + margin_of_error

print(f"\n95% Confidence Interval:")
print(f"({ci_lower:.3f}, {ci_upper:.3f})")
print(f"({ci_lower*100:.1f}%, {ci_upper*100:.1f}%)")

Sample proportion (p̂): 0.230

Checking success-failure condition:
Successes (np̂): 92.0
Failures (n(1-p̂)): 308.0

Standard Error: 0.0210
Critical value (z*): 1.9600
Margin of Error: 0.0412

95% Confidence Interval:
(0.189, 0.271)
(18.9%, 27.1%)


Sample Proportion (Point Estimate): Our observed click-through rate is 23.0% (92/400). This is our best single estimate, but we know it's not perfect. That's why we need a confidence interval.
Success-Failure Condition:
Successes: 92 (well above 10)
Failures: 308 (well above 10) This tells us our sample is large enough to use normal-based methods reliably.
Standard Error: We calculated a standard error of about 0.021, or 2.1 percentage points. This represents the typical amount our sample proportion might vary from sample to sample just by chance.
Confidence Interval Construction:
For 95% confidence, we use a z* value of 1.96 (from the standard normal)
Our margin of error is about 4.1 percentage points (1.96 × 2.1%)
This gives us an interval from 18.9% to 27.1

"Based on our analysis of the first 400 emails, we can be 95% confident that the 
true long-term click-through rate for this campaign will fall between 18.9% and 27.1%."

Here's what this means for planning:

Best estimate: 23.0% click-through rate
Conservative estimate (lower bound): 18.9%
Optimistic estimate (upper bound): 27.1%
For quarterly planning, I recommend:

Using 18.9% for conservative forecasts
Using 23.0% for typical forecasts
Using 27.1% for optimistic scenarios
The margin of error (±4.1 percentage points) gives us reasonable precision for planning. However, if we need more precise estimates, we could:

Analyze more emails (doubling our sample size would reduce the margin of error by about 30%)
Accept a lower confidence level (90% confidence would give us a narrower interval but less certainty)

# EXAMPLE 2

In [2]:
# Sample data from recommendation system
total_users = 400  # Exceeded minimum required
clicks = 180       # Users who clicked recommendations

# Calculate sample proportion
p_hat = clicks / total_users  # 0.45 or 45%

# Check success-failure condition
successes = total_users * p_hat          # 180
failures = total_users * (1 - p_hat)     # 220
print(f"Success-failure check: {successes >= 10 and failures >= 10}")  # True

Success-failure check: True


In [4]:
def calculate_proportion_ci(successes, n, confidence_level=0.95):
    p_hat = successes / n
    z_score = np.abs(stats.norm.ppf((1-confidence_level)/2))
    margin_of_error = z_score * np.sqrt((p_hat * (1-p_hat)) / n)
    
    lower = p_hat - margin_of_error
    upper = p_hat + margin_of_error
    
    return lower, upper, margin_of_error

lower, upper, me = calculate_proportion_ci(180, 400)
print(f"95% CI: ({lower:.3f}, {upper:.3f})")  # Output: (0.401, 0.499)
print(f"Margin of Error: {me:.3f}")           # Output: 0.049

95% CI: (0.401, 0.499)
Margin of Error: 0.049


Check that your interval makes sense:

Falls within [0,1]
Width seems reasonable given sample size
Matches business context
For our recommendation system: "We can be 95% confident that between 40.1% and 49.9% of all users click on recommended products. The margin of error is approximately ±4.9 percentage points."

Beyond statistical considerations, evaluate:

Does the interval width provide actionable information?
Are both bounds meaningful for decision-making?
How does this compare to business targets?
In our case:

Even the lower bound (40.1%) exceeds the 35% target click rate
The interval width (9.8 percentage points) is narrow enough for decision-making
Results suggest the recommendation system is performing adequately

Present findings in business terms:

"Based on our analysis of 400 users, we're 95% confident that the true click-through rate for product recommendations falls between 40% and 50%. Since even our most conservative estimate (40%) exceeds our target of 35%, we can confidently say the recommendation system is meeting its performance goals. However, there might be room for improvement as we're not yet reaching the stretch goal of 55%."

# EXAMPLE 3

In [5]:
# Calculate sample proportions
chat_prop = 280/320  # ≈ 0.875 or 87.5%
email_prop = 410/500  # ≈ 0.82 or 82%

# Check success-failure condition for both groups
def check_conditions(successes, n):
    failures = n - successes
    return {
        'np': n * (successes/n),
        'n(1-p)': n * (1 - successes/n),
        'conditions_met': (n * (successes/n) >= 10) and (n * (1 - successes/n) >= 10)
    }

chat_check = check_conditions(280, 320)
email_check = check_conditions(410, 500)

print("Chat conditions:")
print(f"np = {chat_check['np']:.1f}")
print(f"n(1-p) = {chat_check['n(1-p)']:.1f}")
print(f"Conditions met: {chat_check['conditions_met']}\n")

print("Email conditions:")
print(f"np = {email_check['np']:.1f}")
print(f"n(1-p) = {email_check['n(1-p)']:.1f}")
print(f"Conditions met: {email_check['conditions_met']}")

Chat conditions:
np = 280.0
n(1-p) = 40.0
Conditions met: True

Email conditions:
np = 410.0
n(1-p) = 90.0
Conditions met: True


In [6]:
from statsmodels.stats.proportion import proportions_ztest

# Prepare data for scipy.stats.proportions_ztest
count = np.array([280, 410])  # successes for chat and email
nobs = np.array([320, 500])   # total observations for each group

# Perform the test
stat, pvalue = proportions_ztest(count, nobs)

print(f"\nZ-statistic: {stat:.4f}")
print(f"P-value: {pvalue:.4f}")

# Calculate the effect size (difference in proportions)
effect_size = (280/320) - (410/500)
print(f"Effect size: {effect_size:.4f}")


Z-statistic: 2.1035
P-value: 0.0354
Effect size: 0.0550


In [7]:
# Calculate pooled proportion
p_pooled = (280 + 410) / (320 + 500)

# Calculate standard error
se = np.sqrt(p_pooled * (1 - p_pooled) * (1/320 + 1/500))

# Calculate 95% confidence interval
z_critical = stats.norm.ppf(0.975)  # for 95% CI
margin_error = z_critical * se
ci_lower = effect_size - margin_error
ci_upper = effect_size + margin_error

print(f"\n95% Confidence Interval: ({ci_lower:.4f}, {ci_upper:.4f})")


95% Confidence Interval: (0.0038, 0.1062)


Here's how you might present these findings to your support manager:

"I've analyzed the satisfaction rates between our chat and email support channels. Here's what I found:
Chat support had a satisfaction rate of 87.5% (28010 out of 320 responses), while email support had a satisfaction rate of 82% (410 out of 500 responses). This represents a 5.5 percentage point difference in favor of chat support.

Our statistical analysis shows this difference is statistically significant (p = 0.037), meaning it's unlikely this difference occurred by chance. We can be 95% confident that chat support's true satisfaction rate is between 0.3 and 10.7 percentage points higher than email support.

Business Implications:

The data supports that chat provides better customer satisfaction.
The minimum improvement we're confident about (0.3 percentage points) might be too small to justify chat's higher cost
However, the potential improvement could be as high as 10.7 percentage points, which might make the investment worthwhile.
Recommendations:

Continue collecting data to narrow down the true difference.
Consider analyzing the cost per satisfied customer for each channel.
Look for patterns in types of issues where chat particularly excels".