In [8]:
#Load statistical analysis
import pandas as pd
from scipy.stats import f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd

In [9]:
anova_data = pd.read_csv('treatment2.csv')
anova_data

Unnamed: 0,Variables,PR1,PR2,PR3,DR1,DR2,DR3,MR1,MR2,MR3
0,CONTROL,20.33,21.48,20.85,642.7,594.3,603.67,106.4,113.26,103.99
1,NHT0254b,-2.93,-1.31,-2.34,497.2,526.34,512.53,132.5,138.13,132.77
2,NGB00749,1.75,1.69,1.74,1052.4,1059.7,1029.26,93.0,90.5,85.92
3,NHT356b,-4.45,-1.82,-0.88,582.4,545.93,549.94,160.6,151.67,168.4
4,NHT0343a,9.11,9.59,8.76,350.7,374.33,363.21,230.5,226.7,237.14
5,NHT0216a,1.91,1.95,1.89,749.4,755.19,740.74,104.6,102.27,102.18
6,NHT0366,-0.3,-0.05,-0.24,374.8,356.03,381.0,196.1,208.53,195.82
7,NHT0355a,-7.04,-1.34,-1.18,353.0,353.82,332.06,398.9,367.27,416.24
8,NHT034a,12.12,11.52,12.54,871.0,850.51,819.58,103.8,110.73,108.64
9,NHT0347,-10.92,-11.43,-3.94,1162.0,1099.16,1165.73,199.3,209.93,213.56


In [10]:
# Reshape the data to long format for Data P
data_pt = anova_data.melt(id_vars='Variables', value_vars=['PR1', 'PR2', 'PR3'], var_name='Replication', value_name='Value')

In [11]:
# Group the data by 'PT' and collect all values into lists (for ANOVA)
grouped_data = data_pt.groupby('Variables')['Value'].apply(list)

# Perform one-way ANOVA
anova_result = f_oneway(*grouped_data)
anova_result

F_onewayResult(statistic=35.50527199861019, pvalue=8.326226584879168e-15)

In [12]:
# Perform Tukey's HSD test (ANOVA - POSTHOC)
tukey_result = pairwise_tukeyhsd(endog=data_pt['Value'], groups=data_pt['Variables'], alpha=0.05)
result = tukey_result.summary()
pd.DataFrame(result).head(17)

Unnamed: 0,0,1,2,3,4,5,6
0,group1,group2,meandiff,p-adj,lower,upper,reject
1,CONTROL,NGB00711,-16.9733,0.0,-24.8383,-9.1084,True
2,CONTROL,NGB00733,-22.73,0.0,-30.5949,-14.8651,True
3,CONTROL,NGB00739,-20.9833,0.0,-28.8483,-13.1184,True
4,CONTROL,NGB00749,-19.16,0.0,-27.0249,-11.2951,True
5,CONTROL,NHT0216a,-18.97,0.0,-26.8349,-11.1051,True
6,CONTROL,NHT0254b,-23.08,0.0,-30.9449,-15.2151,True
7,CONTROL,NHT0259a,-36.08,0.0,-43.9449,-28.2151,True
8,CONTROL,NHT0339a,-31.5967,0.0,-39.4616,-23.7317,True
9,CONTROL,NHT0343a,-11.7333,0.0005,-19.5983,-3.8684,True


In [13]:
# Reshape the data to long format for Data D
data_dt = anova_data.melt(id_vars='Variables', value_vars=['DR1', 'DR2', 'DR3'], var_name='Replication', value_name='Value')

In [14]:
# Group the data by 'DT' and collect all values into lists (for ANOVA)
grouped_data = data_dt.groupby('Variables')['Value'].apply(list)

# Perform one-way ANOVA
anova_result = f_oneway(*grouped_data)
anova_result

F_onewayResult(statistic=522.0569566426879, pvalue=7.887730813394322e-32)

In [15]:
# Perform Tukey's HSD test (ANOVA - POSTHOC)
tukey_result = pairwise_tukeyhsd(endog=data_dt['Value'], groups=data_dt['Variables'], alpha=0.05)
result = tukey_result.summary()
pd.DataFrame(result).head(17)

Unnamed: 0,0,1,2,3,4,5,6
0,group1,group2,meandiff,p-adj,lower,upper,reject
1,CONTROL,NGB00711,-396.3767,0.0,-459.9035,-332.8499,True
2,CONTROL,NGB00733,-255.85,0.0,-319.3768,-192.3232,True
3,CONTROL,NGB00739,90.8433,0.0009,27.3165,154.3701,True
4,CONTROL,NGB00749,433.5633,0.0,370.0365,497.0901,True
5,CONTROL,NHT0216a,134.8867,0.0,71.3599,198.4135,True
6,CONTROL,NHT0254b,-101.5333,0.0002,-165.0601,-38.0065,True
7,CONTROL,NHT0259a,-324.67,0.0,-388.1968,-261.1432,True
8,CONTROL,NHT0339a,-175.9967,0.0,-239.5235,-112.4699,True
9,CONTROL,NHT0343a,-250.81,0.0,-314.3368,-187.2832,True


In [16]:
# Reshape the data to long format for Data M
data_mt = anova_data.melt(id_vars='Variables', value_vars=['MR1', 'MR2', 'MR3'], var_name='Replication', value_name='Value')

In [17]:
# Group the data by 'DT' and collect all values into lists (for ANOVA)
grouped_data = data_mt.groupby('Variables')['Value'].apply(list)

# Perform one-way ANOVA
anova_result = f_oneway(*grouped_data)
anova_result

F_onewayResult(statistic=281.3710259114196, pvalue=7.803091774193629e-28)

In [18]:
# Perform Tukey's HSD test (ANOVA - POSTHOC)
tukey_result = pairwise_tukeyhsd(endog=data_mt['Value'], groups=data_mt['Variables'], alpha=0.05)
result = tukey_result.summary()
pd.DataFrame(result).head(17)

Unnamed: 0,0,1,2,3,4,5,6
0,group1,group2,meandiff,p-adj,lower,upper,reject
1,CONTROL,NGB00711,118.7433,0.0,92.9121,144.5746,True
2,CONTROL,NGB00733,62.0333,0.0,36.2021,87.8646,True
3,CONTROL,NGB00739,68.2933,0.0,42.4621,94.1246,True
4,CONTROL,NGB00749,-18.0767,0.4207,-43.9079,7.7546,False
5,CONTROL,NHT0216a,-4.8667,1.0,-30.6979,20.9646,False
6,CONTROL,NHT0254b,26.5833,0.0389,0.7521,52.4146,True
7,CONTROL,NHT0259a,121.2033,0.0,95.3721,147.0346,True
8,CONTROL,NHT0339a,202.23,0.0,176.3988,228.0612,True
9,CONTROL,NHT0343a,123.5633,0.0,97.7321,149.3946,True
