In [1]:
import pingouin as pg
import pandas as pd

Loading Data

In [12]:
cancer_data = pd.read_csv("cancer.csv")

In [13]:
cancer_data

Unnamed: 0,Clinically healthy,Single condition: Cancer,Cancer and at least one other condition
0,11.34,13.72,9.59
1,22.15,10.83,17.50
2,1.81,14.06,32.09
3,8.97,16.52,30.27
4,8.36,10.15,6.90
...,...,...,...
268,13.59,,
269,13.78,,
270,9.51,,
271,15.92,,


Converting Data into Long Format (One Row One Observation)

In [15]:
cancer_data_cleaned = cancer_data.melt(var_name='Cancer Diagnosis', value_name='Telomere Length').dropna()

In [16]:
cancer_data_cleaned

Unnamed: 0,Cancer Diagnosis,Telomere Length
0,Clinically healthy,11.34
1,Clinically healthy,22.15
2,Clinically healthy,1.81
3,Clinically healthy,8.97
4,Clinically healthy,8.36
...,...,...
592,Cancer and at least one other condition,8.32
593,Cancer and at least one other condition,9.72
594,Cancer and at least one other condition,4.35
595,Cancer and at least one other condition,7.76


Descriptive Statistics

In [17]:
cancer_data.describe()

Unnamed: 0,Clinically healthy,Single condition: Cancer,Cancer and at least one other condition
count,273.0,48.0,51.0
mean,10.647949,16.555625,14.808235
std,7.443969,9.280937,11.210652
min,1.17,3.39,0.08
25%,5.56,10.66,7.83
50%,8.54,13.785,11.17
75%,13.26,19.615,18.22
max,38.83,41.93,52.92


In [18]:
cancer_data_cleaned.describe()

Unnamed: 0,Telomere Length
count,372.0
mean,11.980591
std,8.578456
min,0.08
25%,6.095
50%,9.535
75%,14.975
max,52.92


Kruskal-Wallis (H-Test)

In [19]:
kw = pg.kruskal(data=cancer_data_cleaned, dv="Telomere Length", between="Cancer Diagnosis", detailed=True)

In [20]:
kw

Unnamed: 0,Source,ddof1,H,p-unc
Kruskal,Cancer Diagnosis,2,27.113771,1e-06


H-Statistic → η² → Cohen's f

In [24]:
sample_size = 372
groups = 3

In [25]:
kw_H = kw['H'].squeeze()
kw_n2 = (((kw_H - groups) + 1)/(sample_size-groups))
kw_cohens_f = ((kw_n2/(1-kw_n2))**0.5)

In [29]:
print(f"Sample Size: {sample_size}")
print(f"No. groups {groups}")
print(f"Effect size (η²): {kw_n2}")
print(f"Cohen's f of the test is: {kw_cohens_f.round(3)}")
print(f"Computed Power is: 0.997 (G*Power)")

Sample Size: 372
No. groups 3
Effect size (η²): 0.06805900055020131
Cohen's f of the test is: 0.27
Computed Power is: 0.997 (G*Power)
