In [1]:
 # One-Way ANOVA Example
from scipy.stats import f_oneway

diet_A = [5, 6, 7, 8, 6]
diet_B = [7, 8, 9, 6, 5]
diet_C = [10, 12, 11, 13, 12]

f_stat, p_value = f_oneway(diet_A, diet_B, diet_C)
print(f"F-statistic: {f_stat:.3f}")
print(f"P-value: {p_value:.3f}")


F-statistic: 23.804
P-value: 0.000


In [2]:
# Two-Way ANOVA
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Sample dataset
data = {
    'Fertilizer': ['F1']*6 + ['F2']*6,
    'Watering': ['Low']*3 + ['High']*3 + ['Low']*3 + ['High']*3,
    'Growth': [10, 12, 11, 15, 14, 16, 9, 8, 10, 13, 14, 13]
}

df = pd.DataFrame(data)

# Fit the two-way ANOVA model with interaction
model = ols('Growth ~ C(Fertilizer) + C(Watering) + C(Fertilizer):C(Watering)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)

# Display the ANOVA table
print("\nTWO-WAY ANOVA RESULTS:")
print(anova_table)



TWO-WAY ANOVA RESULTS:
                              sum_sq   df     F    PR(>F)
C(Fertilizer)              10.083333  1.0  12.1  0.008338
C(Watering)                52.083333  1.0  62.5  0.000048
C(Fertilizer):C(Watering)   0.083333  1.0   0.1  0.759923
Residual                    6.666667  8.0   NaN       NaN


In [3]:
# Exercise 1: Exam Preparation Methods

# Scenario: You want to test whether different exam preparation methods affect student performance.

# Groups:

#     Group A: Self-study → [70, 72, 68, 71]

#     Group B: Group study → [65, 67, 66, 68]

#     Group C: Online coaching → [80, 82, 78, 81]

# Tasks:

#     Perform One-Way ANOVA

#     State the null and alternative hypotheses

#     Interpret the p-value

from scipy.stats import f_oneway

# Scores
group_A = [70, 72, 68, 71]   # Self-study
group_B = [65, 67, 66, 68]   # Group study
group_C = [80, 82, 78, 81]   # Online coaching

# Perform One-Way ANOVA
f_stat, p_value = f_oneway(group_A, group_B, group_C)

print(f"F-statistic: {f_stat:.4f}")
print(f"P-value: {p_value:.6f}")

# Interpretation
alpha = 0.05
if p_value < alpha:
    print("✅ Reject the null hypothesis → At least one group mean is significantly different.")
else:
    print("❌ Fail to reject the null hypothesis → No significant difference between group means.")


F-statistic: 80.8333
P-value: 0.000002
✅ Reject the null hypothesis → At least one group mean is significantly different.


In [4]:
# Training Program & Gender

# Scenario: A company tests 2 training programs on employee productivity, considering gender.

# Data:
# Training	Gender	Productivity
# T1	M	80, 82, 85
# T1	F	78, 76, 79
# T2	M	88, 90, 87
# T2	F	85, 84, 86

# Tasks:

#     Perform Two-Way ANOVA

#     Test:

#         Effect of Training

#         Effect of Gender

#         Interaction effect (Training × Gender)

import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Step 1: Create the DataFrame
data = {
    'Training': ['T1']*6 + ['T2']*6,
    'Gender': ['M']*3 + ['F']*3 + ['M']*3 + ['F']*3,
    'Productivity': [80, 82, 85, 78, 76, 79, 88, 90, 87, 85, 84, 86]
}
df = pd.DataFrame(data)

# Step 2: Fit the Two-Way ANOVA model (including interaction)
model = ols('Productivity ~ C(Training) + C(Gender) + C(Training):C(Gender)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)

# Step 3: Display the ANOVA table
print("\nTWO-WAY ANOVA RESULTS:")
print(anova_table)




TWO-WAY ANOVA RESULTS:
                           sum_sq   df          F    PR(>F)
C(Training)            133.333333  1.0  44.444444  0.000158
C(Gender)               48.000000  1.0  16.000000  0.003950
C(Training):C(Gender)    1.333333  1.0   0.444444  0.523742
Residual                24.000000  8.0        NaN       NaN
