# One-way ANOVA: Car oil

Researchers took 20 cars of the same to take part in a study. These cars are randomly doped with one of the four-engine oils and allowed to run freely for 100 kilometers each. At the end of the journey, the performance of each of the cars is noted.

In [1]:
#install scipy
#install pandas
#install numpy

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

### This is the data of 4 performances with 4 different oils

In [2]:
performance1 = [89, 89, 88, 78, 79]
performance2 = [93, 92, 94, 89, 88]
performance3 = [89, 88, 89, 93, 90]
performance4 = [81, 78, 81, 92, 82]

performances_df = pd.DataFrame([performance1, performance2, performance3, performance4],index=["oil1", "oil2", "oil3", "oil4"])
performances_df = performances_df.transpose()
print(performances_df)

   oil1  oil2  oil3  oil4
0    89    93    89    81
1    89    92    88    78
2    88    94    89    81
3    78    89    93    92
4    79    88    90    82


### Calculate overal mean
the total mean is the same as the mean of the 4 sample means 

In [3]:
means_of_samples = performances_df.mean()
total_mean = means_of_samples.____()
print(means_of_samples)
print(f"total mean: {total_mean}")

oil1    84.6
oil2    91.2
oil3    89.8
oil4    82.8
dtype: float64
total mean: 87.10000000000001


### Calculate SST

In [4]:
performances_df["SST_1"] = (performances_df["oil1"]-____)**2
performances_df["SST_2"] = (performances_df["oil2"]-____)**2
performances_df["SST_3"] = (performances_df["oil3"]-____)**2
performances_df["SST_4"] = (performances_df["oil4"]-____)**2

SST = np.sum(performances_df[["SST_1", "SST_2", "SST_3", "SST_4"]].sum(axis="index"))

print(SST)


525.8


### Calculate SSW

In [5]:
performances_df["SSW_1"] = (performances_df["oil1"]-____["oil1"])**2
performances_df["SSW_2"] = (performances_df["oil2"]-____["oil2"])**2
performances_df["SSW_3"] = (performances_df["oil3"]-____["oil3"])**2
performances_df["SSW_4"] = (performances_df["oil4"]-____["oil4"])**2

SSW = np.sum(performances_df[["SSW_1", "SSW_2", "SSW_3", "SSW_4"]].sum(axis="index"))

print(SSW)

281.6


### Calculate SSB

In [6]:
performances_df["SSB_1"] = (means_of_samples["oil1"]-____)**2
performances_df["SSB_2"] = (means_of_samples["oil2"]-____)**2
performances_df["SSB_3"] = (means_of_samples["oil3"]-____)**2
performances_df["SSB_4"] = (means_of_samples["oil4"]-____)**2

SSB = np.sum(performances_df[["SSB_1", "SSB_2", "SSB_3", "SSB_4"]].sum(axis="index"))

print(SSB)
print(performances_df)

244.20000000000027
   oil1  oil2  oil3  oil4  SST_1  SST_2  SST_3  SST_4  SSW_1  SSW_2  SSW_3  \
0    89    93    89    81   3.61  34.81   3.61  37.21  19.36   3.24   0.64   
1    89    92    88    78   3.61  24.01   0.81  82.81  19.36   0.64   3.24   
2    88    94    89    81   0.81  47.61   3.61  37.21  11.56   7.84   0.64   
3    78    89    93    92  82.81   3.61  34.81  24.01  43.56   4.84  10.24   
4    79    88    90    82  65.61   0.81   8.41  26.01  31.36  10.24   0.04   

   SSW_4  SSB_1  SSB_2  SSB_3  SSB_4  
0   3.24   6.25  16.81   7.29  18.49  
1  23.04   6.25  16.81   7.29  18.49  
2   3.24   6.25  16.81   7.29  18.49  
3  84.64   6.25  16.81   7.29  18.49  
4   0.64   6.25  16.81   7.29  18.49  


### F - Statistic

In [7]:
# number of measurements per group
n = 5

# number of groups compared
m = 4

#    variance between / variance within
#    a large F indicates a differce between the means in the populations
F = (___ / (m-1))/(___ / (m * (n-1)))

print(F)


4.625000000000004


### Conclusion

In [8]:
from scipy.stats import f

p_value = f.sf(____, m-1, m * (n-1))

print(p_value)

0.016336459839780187


#### The p value is less than 0.05, so we may reject the null hypothesis with a significance level of 0.05

### Tip! we could have done it in 5 lines of code as well

In [9]:

from scipy.stats import f_oneway
 
performance1 = [89, 89, 88, 78, 79]
performance2 = [93, 92, 94, 89, 88]
performance3 = [89, 88, 89, 93, 90]
performance4 = [81, 78, 81, 92, 82]
 
f_oneway(performance1, performance2, performance3, performance4)


F_onewayResult(statistic=4.625000000000002, pvalue=0.016336459839780215)

data from: https://www.geeksforgeeks.org/how-to-perform-a-one-way-anova-in-python/
 