In [1]:
#Load statistical analysis
import pandas as pd
from scipy.stats import f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd

In [2]:
anova_data = pd.read_csv('treatment1.csv')
anova_data

Unnamed: 0,Variables,PR1,PR2,PR3,DR1,DR2,DR3,MR1,MR2,MR3
0,CONTROL,18.98,17.21,19.04,432.0,441.0,426.1,124.8,124.38,119.25
1,NHT0254b,5.72,6.21,5.96,552.6,554.9,550.0,132.1,136.25,124.54
2,NHT0356b,4.41,4.56,4.87,571.0,572.0,570.0,116.1,110.24,121.24
3,NGB00699,0.04,0.05,0.04,622.0,618.0,626.0,127.2,135.04,126.58
4,NHT0206a,-10.04,-9.97,-10.11,681.8,697.0,665.0,131.8,141.6,121.77
5,NHT100,2.36,2.45,2.29,672.3,683.0,661.0,80.2,84.67,83.22
6,NHT0199c,-2.23,-2.22,-3.01,670.1,662.0,678.0,148.7,149.97,150.37
7,NGB00733,-6.91,-6.48,-7.16,448.3,441.0,455.0,187.3,198.44,185.07
8,NGB00739,-6.84,-7.11,-6.9,509.0,516.0,503.0,250.2,265.54,251.25
9,NHT034a,-4.37,-4.87,-4.55,527.1,532.0,522.0,231.5,247.88,232.3


In [3]:
# Reshape the data to long format for Data P
data_pt = anova_data.melt(id_vars='Variables', value_vars=['PR1', 'PR2', 'PR3'], var_name='Replication', value_name='Value')

In [4]:
# Group the data by 'PT' and collect all values into lists (for ANOVA)
grouped_data = data_pt.groupby('Variables')['Value'].apply(list)

# Perform one-way ANOVA
anova_result = f_oneway(*grouped_data)
anova_result

F_onewayResult(statistic=1222.0654819131378, pvalue=4.035409587598977e-42)

In [5]:
# Perform Tukey's HSD test (ANOVA - POSTHOC)
tukey_result = pairwise_tukeyhsd(endog=data_pt['Value'], groups=data_pt['Variables'], alpha=0.05)
result = tukey_result.summary()
pd.DataFrame(result).head(17)

Unnamed: 0,0,1,2,3,4,5,6
0,group1,group2,meandiff,p-adj,lower,upper,reject
1,CONTROL,NGB00699,-18.3667,0.0,-19.3489,-17.3845,True
2,CONTROL,NGB00713,-17.0967,0.0,-18.0789,-16.1145,True
3,CONTROL,NGB00733,-25.26,0.0,-26.2422,-24.2778,True
4,CONTROL,NGB00739,-25.36,0.0,-26.3422,-24.3778,True
5,CONTROL,NHT0199c,-20.8967,0.0,-21.8789,-19.9145,True
6,CONTROL,NHT0206a,-28.45,0.0,-29.4322,-27.4678,True
7,CONTROL,NHT0216a,-20.7833,0.0,-21.7655,-19.8011,True
8,CONTROL,NHT0226a,-18.79,0.0,-19.7722,-17.8078,True
9,CONTROL,NHT0254b,-12.4467,0.0,-13.4289,-11.4645,True


In [6]:
# Reshape the data to long format for Data D
data_dt = anova_data.melt(id_vars='Variables', value_vars=['DR1', 'DR2', 'DR3'], var_name='Replication', value_name='Value')

In [7]:
# Group the data by 'DT' and collect all values into lists (for ANOVA)
grouped_data = data_dt.groupby('Variables')['Value'].apply(list)

# Perform one-way ANOVA
anova_result = f_oneway(*grouped_data)
anova_result

F_onewayResult(statistic=559.9403788059761, pvalue=2.2236079411906107e-36)

In [8]:
# Perform Tukey's HSD test (ANOVA - POSTHOC)
tukey_result = pairwise_tukeyhsd(endog=data_dt['Value'], groups=data_dt['Variables'], alpha=0.05)
result = tukey_result.summary()
pd.DataFrame(result).head(17)

Unnamed: 0,0,1,2,3,4,5,6
0,group1,group2,meandiff,p-adj,lower,upper,reject
1,CONTROL,NGB00699,188.9667,0.0,137.076,240.8573,True
2,CONTROL,NGB00713,-134.8667,0.0,-186.7573,-82.976,True
3,CONTROL,NGB00733,15.0667,0.9994,-36.824,66.9573,False
4,CONTROL,NGB00739,76.3,0.0004,24.4094,128.1906,True
5,CONTROL,NHT0199c,237.0,0.0,185.1094,288.8906,True
6,CONTROL,NHT0206a,248.2333,0.0,196.3427,300.124,True
7,CONTROL,NHT0216a,117.7,0.0,65.8094,169.5906,True
8,CONTROL,NHT0226a,425.0667,0.0,373.176,476.9573,True
9,CONTROL,NHT0254b,119.4667,0.0,67.576,171.3573,True


In [9]:
# Reshape the data to long format for Data M
data_mt = anova_data.melt(id_vars='Variables', value_vars=['MR1', 'MR2', 'MR3'], var_name='Replication', value_name='Value')

In [10]:
# Group the data by 'DT' and collect all values into lists (for ANOVA)
grouped_data = data_mt.groupby('Variables')['Value'].apply(list)

# Perform one-way ANOVA
anova_result = f_oneway(*grouped_data)
anova_result

F_onewayResult(statistic=487.3239911336594, pvalue=2.326973292243675e-35)

In [11]:
# Perform Tukey's HSD test (ANOVA - POSTHOC)
tukey_result = pairwise_tukeyhsd(endog=data_mt['Value'], groups=data_mt['Variables'], alpha=0.05)
result = tukey_result.summary()
pd.DataFrame(result).head(17)

Unnamed: 0,0,1,2,3,4,5,6
0,group1,group2,meandiff,p-adj,lower,upper,reject
1,CONTROL,NGB00699,6.7967,0.9989,-15.6451,29.2385,False
2,CONTROL,NGB00713,98.22,0.0,75.7782,120.6618,True
3,CONTROL,NGB00733,67.46,0.0,45.0182,89.9018,True
4,CONTROL,NGB00739,132.8533,0.0,110.4115,155.2951,True
5,CONTROL,NHT0199c,26.87,0.0076,4.4282,49.3118,True
6,CONTROL,NHT0206a,8.9133,0.9825,-13.5285,31.3551,False
7,CONTROL,NHT0216a,341.31,0.0,318.8682,363.7518,True
8,CONTROL,NHT0226a,145.5767,0.0,123.1349,168.0185,True
9,CONTROL,NHT0254b,8.1533,0.9926,-14.2885,30.5951,False
