In [6]:
import numpy as np
import pandas as pd
from scipy.stats import shapiro
from statsmodels.stats.anova import AnovaRM

# Create the data
dataframe = pd.DataFrame({'Cars': np.repeat([1, 2, 3, 4, 5], 4),
                          'Oil': np.tile([1, 2, 3, 4], 5),
                          'Mileage': [36, 38, 30, 29,
                                      34, 38, 30, 29,
                                      34, 28, 38, 32,
                                      38, 34, 20, 44,
                                      26, 28, 34, 50]})

# Check the assumptions

# 1. Independence Assumption: Assuming each car's mileage is independent of the others.
# No specific test is required for this assumption.

# 2. Normality Assumption: Check if the residuals are approximately normally distributed.
# Using Shapiro-Wilk test for normality
residuals = dataframe['Mileage'] - dataframe.groupby('Oil')['Mileage'].transform('mean')
shapiro_test_stat, shapiro_p_value = shapiro(residuals)
print(f"Shapiro-Wilk Test for Normality - p-value: {shapiro_p_value:.4f}")

# 3. Sphericity Assumption: Mauchly's Test for Sphericity


# Conduct repeated-measures ANOVA
anova_result = AnovaRM(dataframe, 'Mileage', 'Cars', within=['Oil']).fit()

# Print ANOVA table
print(anova_result)

# Report the results in APA format
print("\nAPA Format Report:")
print("Repeated Measures ANOVA was conducted to assess the difference in car mileage between four engine oils.")
print("\nAssumption Checks:")
print(f"1. Independence Assumption: No specific test is required, assuming independence of car mileages.")
print(f"2. Normality Assumption: Shapiro-Wilk Test p-value = {shapiro_p_value:.4f}. The residuals are approximately normally distributed.")

# Interpret the results
print("\nANOVA Results:")
print(anova_result.anova_table)


Shapiro-Wilk Test for Normality - p-value: 0.6504
              Anova
    F Value Num DF  Den DF Pr > F
---------------------------------
Oil  0.5679 3.0000 12.0000 0.6466


APA Format Report:
Repeated Measures ANOVA was conducted to assess the difference in car mileage between four engine oils.

Assumption Checks:
1. Independence Assumption: No specific test is required, assuming independence of car mileages.
2. Normality Assumption: Shapiro-Wilk Test p-value = 0.6504. The residuals are approximately normally distributed.

ANOVA Results:
      F Value  Num DF  Den DF    Pr > F
Oil  0.567884     3.0    12.0  0.646647
