## Python Levene's Test

In [1]:
import scipy.stats as stats
import numpy as np

In [2]:
alpha = 0.05

In [3]:
# Example 1
# 100 m race times for college and pro athletes 

In [4]:
college_times = [10.5, 10.7, 10.8, 10.4, 10.6]

In [5]:
pro_times = [9.9, 9.8, 9.7, 10, 9.9]

In [6]:
#calc the means

In [7]:
college_mean = np.mean(college_times)

In [8]:
pro_mean = np.mean(pro_times)

In [9]:
print(pro_mean)

9.860000000000001


In [10]:
print(college_mean)

10.6


In [11]:
#calc abs deviations

In [12]:
college_abs_deviations = np.abs(college_times - college_mean)

In [13]:
print(college_abs_deviations)

[0.1 0.1 0.2 0.2 0. ]


In [14]:
pro_abs_deviations = np.abs(pro_times - pro_mean)

In [15]:
print(pro_abs_deviations)

[0.04 0.06 0.16 0.14 0.04]


In [16]:
#calc mean abs deviations

In [17]:
college_mean_deviations = np.mean(college_abs_deviations)

In [18]:
print(college_mean_deviations)

0.11999999999999993


In [19]:
pro_mean_deviations = np.mean(pro_abs_deviations)

In [20]:
print(pro_mean_deviations)

0.0879999999999999


In [21]:
# Calc sum of squared deviations

In [43]:
college_squares_deviations = np.sum((college_abs_deviations - college_mean_deviations)**2)
print(college_squares_deviations)

0.028000000000000084


In [44]:
pro_squares_deviations = np.sum((pro_abs_deviations - pro_mean_deviations)**2)
print(pro_squared_deviations)

0.013280000000000286


In [24]:
# Total squared deviations

In [45]:
total_squares_deviations = college_squares_deviations + pro_squares_deviations

In [26]:
# Total observations

In [46]:
N = len(college_times) + len(pro_times)

In [47]:
k = 2

In [48]:
# degrees of freedom

In [49]:
df_between = k - 1

In [50]:
print(df_between)

1


In [51]:
df_within = N - k

In [52]:
print(df_within)

8


In [53]:
# Between group sum of squares 

In [54]:
Z_grand_mean = np.mean(np.concatenate([college_abs_deviations, pro_abs_deviations]))

In [55]:
print(Z_grand_mean)

0.10399999999999991


In [56]:
sum_squares_between = len(college_times) * (college_mean_deviations - Z_grand_mean)**2 + len(pro_times) * (pro_mean_deviations - Z_grand_mean)**2

In [57]:
print(sum_squares_between)

0.002560000000000005


In [58]:
# Calculate formula

In [59]:
w = (df_within * sum_squares_between) / (df_between * total_squares_deviations)

In [60]:
print(w)

0.49612403100774843


In [61]:
# Find p value

In [62]:
p_value_manual = 1 - stats.f.cdf(w, df_between, df_within)

In [63]:
print(p_value_manual)

0.5011942660391169


In [64]:
# Analyze p value

In [65]:
if p_value_manual < alpha:
    print("Reject Null Hypothesis, different variances")
else:
    print("Fail to Reject Null Hypothesis, same variances")

Fail to Reject Null Hypothesis, same variances


In [66]:
# Example 2 - Slayer vs Metallica Ticket Sales

In [67]:
slayer_sales = [20, 18, 22, 19, 21]

In [68]:
metallica_sales = [28, 30, 27, 29, 31]

In [69]:
w, p_value = stats.levene(slayer_sales, metallica_sales, center='median')

In [70]:
print(w)

0.0


In [71]:
print(p_value)

1.0


In [73]:
if p_value < alpha:
    print("Reject Null Hypothesis, different variances")
else:
    print("Fail to Reject Null Hypothesis, same variances")

Fail to Reject Null Hypothesis, same variances


In [74]:
# Example 3 - 3 bands looking at the mean
gojira_sales = [7000, 11000, 2500, 9000, 3000]

In [75]:
mastodon_sales = [3000, 4000, 1500, 4000, 2000]

In [76]:
opeth_sales = [2000, 1000, 2500, 3000, 1500]

In [77]:
w, p_value = stats.levene(gojira_sales, mastodon_sales, opeth_sales, center='mean')

In [78]:
print(w)

8.732259170380528


In [79]:
print(p_value)

0.004563430072911574


In [80]:
if p_value < alpha:
    print("Reject Null Hypothesis, different variances")
else:
    print("Fail to Reject Null Hypothesis, same variances")

Reject Null Hypothesis, different variances
