# Shapiro-Wilk Test

In [1]:
import pandas as pd
import numpy as np
from scipy import stats

In [2]:
x = np.asarray([1.906,2.103,1.522,2.618,1.427,2.225,1.697,3.154,1.985,1.996])

In [3]:
x

array([1.906, 2.103, 1.522, 2.618, 1.427, 2.225, 1.697, 3.154, 1.985,
       1.996])

In [4]:
df = pd.DataFrame({'x':x})

In [6]:
df.head()

Unnamed: 0,x
0,1.906
1,2.103
2,1.522
3,2.618
4,1.427


In [7]:
df = df.sort_values(['x'])

In [8]:
df

Unnamed: 0,x
4,1.427
2,1.522
6,1.697
0,1.906
8,1.985
9,1.996
1,2.103
5,2.225
3,2.618
7,3.154


In [11]:
df = df.reset_index(drop = True)

In [12]:
df

Unnamed: 0,x
0,1.427
1,1.522
2,1.697
3,1.906
4,1.985
5,1.996
6,2.103
7,2.225
8,2.618
9,3.154


In [13]:
media = np.mean(df['x'])

In [14]:
media

2.0633

In [15]:
df['SSi'] = (df['x']-media)**2

In [16]:
df

Unnamed: 0,x,SSi
0,1.427,0.404878
1,1.522,0.293006
2,1.697,0.134176
3,1.906,0.024743
4,1.985,0.006131
5,1.996,0.004529
6,2.103,0.001576
7,2.225,0.026147
8,2.618,0.307692
9,3.154,1.189626


In [17]:
SS = np.sum(df['SSi'])
SS

2.3925041

$$
b = \sum_{i=1}^{n/2} a_{n-i+1}(x_{n-i+1}-x_i)
$$

In [18]:
df2 = pd.DataFrame({'i':[1,2,3,4,5]})

In [19]:
df2

Unnamed: 0,i
0,1
1,2
2,3
3,4
4,5


In [20]:
df2['xi'] = df['x'][0:5]

In [21]:
df2

Unnamed: 0,i,xi
0,1,1.427
1,2,1.522
2,3,1.697
3,4,1.906
4,5,1.985


In [22]:
df = df.sort_values(['x'],ascending=False)

In [23]:
df

Unnamed: 0,x,SSi
9,3.154,1.189626
8,2.618,0.307692
7,2.225,0.026147
6,2.103,0.001576
5,1.996,0.004529
4,1.985,0.006131
3,1.906,0.024743
2,1.697,0.134176
1,1.522,0.293006
0,1.427,0.404878


In [29]:
df['x'][0:5].values

array([3.154, 2.618, 2.225, 2.103, 1.996])

In [27]:
df2['xn-i+1'] = df['x'][0:5].values

In [30]:
df2

Unnamed: 0,i,xi,xn-i+1
0,1,1.427,3.154
1,2,1.522,2.618
2,3,1.697,2.225
3,4,1.906,2.103
4,5,1.985,1.996


In [31]:
df2['a'] = [0.5739,0.3291,0.2141,0.1224,0.0399]

In [32]:
df2

Unnamed: 0,i,xi,xn-i+1,a
0,1,1.427,3.154,0.5739
1,2,1.522,2.618,0.3291
2,3,1.697,2.225,0.2141
3,4,1.906,2.103,0.1224
4,5,1.985,1.996,0.0399


In [33]:
df2['bi'] = df2['a']*(df2['xn-i+1']-df2['xi'])

In [34]:
df2

Unnamed: 0,i,xi,xn-i+1,a,bi
0,1,1.427,3.154,0.5739,0.991125
1,2,1.522,2.618,0.3291,0.360694
2,3,1.697,2.225,0.2141,0.113045
3,4,1.906,2.103,0.1224,0.024113
4,5,1.985,1.996,0.0399,0.000439


In [35]:
b = np.sum(df2['bi'])
b

1.4894153999999997

In [36]:
Wcalc = b**2/SS
Wcalc

0.9272118838823136

In [37]:
x0 = 0.1
x1 = 0.5
y0 = 0.869
y1 = 0.938
y = Wcalc

In [38]:
pval = (y - y0)*(x1-x0)/(y1-y0)+x0
pval

0.4374601964192094

In [39]:
alpha = 0.05

In [40]:
pval > alpha

True

In [41]:
from scipy import stats

In [42]:
x

array([1.906, 2.103, 1.522, 2.618, 1.427, 2.225, 1.697, 3.154, 1.985,
       1.996])

In [45]:
Wc,pv = stats.shapiro(x)

In [44]:
Wcalc

0.9272118838823136

In [46]:
Wc

0.9268980622291565

In [47]:
pv

0.4180838465690613