In [2]:
# Example:
# There are three different types of fertilizer (A, B, C), and we want to determine 
# if they have a significant effect on plant growth.
# We measure the height of 20 plants in each group after a month of 
# using the respective fertilizers.

# Null Hypothesis(Ho):- The mean of plant height for the three fertilizer groups are equal.
# Alternate Hypothesis(H1):- At least one group's mean is different from the others.

# Use scipy.stats to perform the ANOVA test.
import numpy as np
import scipy.stats as stats

# Sample data for plant height in inches for each fertilizer type
fertilizer_A = np.array([12, 14, 16, 15, 18, 19, 17, 20, 21, 23, 22, 20, 24, 25, 23, 21, 18, 19, 20, 22])
fertilizer_B = np.array([14, 15, 16, 16, 18, 20, 19, 21, 22, 24, 23, 21, 25, 26, 24, 22, 19, 20, 22, 24])
fertilizer_C = np.array([15, 16, 17, 18, 19, 21, 20, 23, 24, 25, 26, 28, 30, 29, 27, 26, 23, 22, 21, 24])

# Perform one-way ANOVA
f_statistics, p_value = stats.f_oneway(fertilizer_A, fertilizer_B, fertilizer_C)

# Interpret the results
alpha = 0.05 # Significance level

print(f"F-statistic: {f_statistics}")
print(f"P-value: {p_value}")

if p_value < alpha:
    print("Reject the null hypothesis: At least one fertilizer has a significant effect on the plant growths")
else:
    print("Fail to reject the null hypothesis: There is no significanct difference between fertilizers")

F-statistic: 3.873958462877751
P-value: 0.026454408962679598
Reject the null hypothesis: At least one fertilizer has a significant effect on the plant growths


In [5]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Create a DataFrame for the data
data = pd.DataFrame({
    'Fertilizer': ['A'] * 20 + ['B'] * 20 + ['C'] * 20,
    'Plant_Height': np.concatenate([fertilizer_A, fertilizer_B, fertilizer_C])
})

# Perform one-way ANOVA
model = ols('Plant_Height ~ Fertilizer', data=data).fit()
anova_table = sm.stats.anova_lm(model, typ=2)

print("ANOVA Table:")
print(anova_table)

# Perform Tukey's HSD post-hoc test
tukey_results = pairwise_tukeyhsd(data['Plant_Height'], data['Fertilizer'])

print("\nTukey's HSD Test Results:")
print(tukey_results)


ANOVA Table:
            sum_sq    df         F    PR(>F)
Fertilizer   109.3   2.0  3.873958  0.026454
Residual     804.1  57.0       NaN       NaN

Tukey's HSD Test Results:
Multiple Comparison of Means - Tukey HSD, FWER=0.05
group1 group2 meandiff p-adj   lower  upper  reject
---------------------------------------------------
     A      B      1.1 0.6262 -1.7582 3.9582  False
     A      C     3.25 0.0222  0.3918 6.1082   True
     B      C     2.15 0.1754 -0.7082 5.0082  False
---------------------------------------------------


In [4]:
!pip install statsmodels

Looking in indexes: https://anu9rng:****@rb-artifactory.bosch.com/artifactory/api/pypi/python-virtual/simple
Collecting statsmodels
  Downloading https://rb-artifactory.bosch.com/artifactory/api/pypi/python-virtual/packages/packages/13/72/b1740ce41a660160732a6901f4a80780c6eb2d1ad4df05a660d75db9252d/statsmodels-0.14.0-cp310-cp310-win_amd64.whl (9.2 MB)
     ---------------------------------------- 9.2/9.2 MB 2.2 MB/s eta 0:00:00
Collecting patsy>=0.5.2
  Downloading https://rb-artifactory.bosch.com/artifactory/api/pypi/python-virtual/packages/packages/2a/e4/b3263b0e353f2be7b14f044d57874490c9cef1798a435f038683acea5c98/patsy-0.5.3-py2.py3-none-any.whl (233 kB)
     -------------------------------------- 233.8/233.8 kB 7.2 MB/s eta 0:00:00
Installing collected packages: patsy, statsmodels
Successfully installed patsy-0.5.3 statsmodels-0.14.0



[notice] A new release of pip available: 22.3.1 -> 23.2.1
[notice] To update, run: python.exe -m pip install --upgrade pip
