In [1]:
import numpy as np
import pandas as pd
from scipy import stats

np.random.seed(42)

# Scenario: 15 baristas test both coffee recipes
# Times are in seconds to prepare one coffee

n_baristas = 15

# Each barista has their own "baseline" speed
barista_baseline = np.random.normal(45, 8, n_baristas)

# Old recipe times (baseline + some noise)
old_recipe = barista_baseline + np.random.normal(0, 2, n_baristas)

# New recipe saves about 3 seconds on average, but varies by barista
improvement = np.random.normal(3, 1.5, n_baristas)
new_recipe = old_recipe - improvement

# Create a DataFrame to see the data clearly
barista_data = pd.DataFrame(
    {
        "barista_id": range(1, n_baristas + 1),
        "old_recipe_time": old_recipe,
        "new_recipe_time": new_recipe,
        "difference": old_recipe - new_recipe,
    }
)

print("First 5 baristas:")
print(barista_data.head())
print(f"Mean old recipe time: {old_recipe.mean():.2f} seconds")
print(f"Mean new recipe time: {new_recipe.mean():.2f} seconds")
print(f"Mean difference: {barista_data['difference'].mean():.2f} seconds")

First 5 baristas:
   barista_id  old_recipe_time  new_recipe_time  difference
0           1        47.849138        45.751698    2.097440
1           2        41.868223        36.089806    5.778417
2           3        50.810003        47.830249    2.979754
3           4        55.368191        53.954757    1.413434
4           5        40.302166        36.068348    4.233817
Mean old recipe time: 44.31 seconds
Mean new recipe time: 41.72 seconds
Mean difference: 2.59 seconds


### Task 1: Independent (wrong) approach
Treat them as two separate groups


In [2]:
from scipy.stats import t

x_old = np.mean(barista_data["old_recipe_time"])
x_new = np.mean(barista_data["new_recipe_time"])
n_old = len(barista_data["old_recipe_time"])
n_new = len(barista_data["new_recipe_time"])
var_old = np.var(barista_data["old_recipe_time"], ddof=1)
var_new = np.var(barista_data["new_recipe_time"], ddof=1)

t_stats = (x_old - x_new) / np.sqrt(var_old / n_old + var_new / n_new)
df_welch = (var_old / n_old + var_new / n_new) ** 2 / (
    (var_old / n_old) ** 2 / (n_old - 1) + (var_new / n_new) ** 2 / (n_new - 1)
)
p_value = 2 * (1 - t.cdf(abs(t_stats), df=df_welch))
print(f"T-stats value: {t_stats:.4f}, P-value: {p_value:.6f}")

T-stats value: 0.8502, P-value: 0.402429


### Task 2: Paired approach - manual


### Cohen's d Formula
Plain text: _d = (mean difference) / (standard deviation)_ <br>

For paired data specifically: <br>

Plain text: d = d̄ / s_d <br>

Formula: $\large d = \frac{\bar{d}}{s_d}$ <br>

> Read as: "d equals d-bar divided by s-sub-d"
> This tells you: "How many standard deviations apart are the two conditions?"
#### Interpreting Cohen's d
Jacob Cohen (the statistician who invented this) suggested these interpretations:
- d ≈ 0.2: Small effect (barely noticeable)
- d ≈ 0.5: Medium effect (visible to the naked eye)
- d ≈ 0.8: Large effect (very noticeable)
- d > 1.0: Very large effect (the two groups barely overlap)

In [10]:
differences = barista_data["difference"]
mean_diff = differences.mean()
hyp_mean = 0
n_diff = len(barista_data["difference"])
sd_sample = np.std(barista_data["difference"], ddof=1)
# Now what? It's a one-sample test on the differences!
t_stats = (mean_diff - hyp_mean) / (sd_sample / np.sqrt(n_diff))
p_value = 2 * (1 - stats.t.cdf(abs(t_stats), df=n_diff - 1))
cohens_d = mean_diff / sd_sample


print(f"T-statistic: {t_stats:.4f}, P-value: {p_value:.6f}, Cohen's-D: {cohens_d:.6f}")

T-statistic: 6.5706, P-value: 0.000012, Cohen's-D: 1.696514


In [6]:
from scipy.stats import ttest_rel

t_stat, p_value = ttest_rel(
    a=barista_data["old_recipe_time"], b=barista_data["new_recipe_time"]
)

print(f"T-statistic: {t_stat:.4f}, P-value: {p_value:.6f}")

T-statistic: 6.5706, P-value: 0.000012


### Task 4: Reflection

In [5]:
# How would this affect your results?
np.random.seed(42)

# Scenario: 15 baristas test both coffee recipes
# Times are in seconds to prepare one coffee

n_baristas = 15

# Each barista has their own "baseline" speed
barista_baseline = np.random.normal(45, 8, n_baristas)

# Old recipe times (baseline + some noise)
old_recipe = barista_baseline + np.random.normal(0, 2, n_baristas)

# New recipe saves about 3 seconds on average, but varies by barista
variable_improvement = np.random.normal(3, 5, n_baristas)  # Much more variable
new_recipe = old_recipe - variable_improvement

# Create a DataFrame to see the data clearly
barista_data_var = pd.DataFrame(
    {
        "barista_id": range(1, n_baristas + 1),
        "old_recipe_time": old_recipe,
        "new_recipe_time": new_recipe,
        "difference": old_recipe - new_recipe,
    }
)

In [7]:
from scipy.stats import ttest_rel

t_stat, p_value = ttest_rel(
    a=barista_data_var["old_recipe_time"], b=barista_data_var["new_recipe_time"]
)

print(f"T-statistic: {t_stat:.4f}, P-value: {p_value:.6f}")

T-statistic: 1.2459, P-value: 0.233264


In [9]:
# From your original barista_data (not the variable one)
t_independent, p_independent = stats.ttest_ind(
    barista_data["old_recipe_time"], barista_data["new_recipe_time"], equal_var=False
)
print(f"Independent samples - p-value: {p_independent:.6f}")
print(f"Paired samples - p-value: {0.000012:.6f}")  # Your earlier result
print(f"Ratio: {p_independent/0.000012:.1f}x larger")

Independent samples - p-value: 0.402429
Paired samples - p-value: 0.000012
Ratio: 33535.7x larger
