In [1]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import MultiComparison


In [2]:
# Load the dataset
df = pd.read_csv('performance_all.csv')


In [3]:
def perform_anova_and_posthoc(dataframe, metric):
    # Fit the model, using Q() to quote the variable name with a space
    model = ols(f'Value ~ C(Q("Encoding Method"))', data=dataframe[dataframe['Metric'] == metric]).fit()
    
    # Perform ANOVA
    anova_result = sm.stats.anova_lm(model, typ=2)
    print(f"ANOVA result for {metric}:\n", anova_result)
    
    # Check if we reject the null hypothesis of equal averages
    if anova_result['PR(>F)'][0] < 0.05:
        print(f"Significant differences found for {metric}, performing post-hoc analysis...\n")
        # Tukey's HSD post-hoc comparison
        mc = MultiComparison(dataframe[dataframe['Metric'] == metric]['Value'], dataframe[dataframe['Metric'] == metric]['Encoding Method'])
        tukey_result = mc.tukeyhsd()
        print(tukey_result)
    else:
        print(f"No significant differences found for {metric}.\n")


In [4]:
# Metrics to analyze
metrics = ['Average Accuracy', 'Average Precision', 'Average Recall', 'Average F1 score']

# Apply the analysis function to each metric
for metric in metrics:
    perform_anova_and_posthoc(df, metric)


ANOVA result for Average Accuracy:
                            sum_sq     df         F        PR(>F)
C(Q("Encoding Method"))  0.774555   15.0  7.659578  5.747178e-12
Residual                 0.862911  128.0       NaN           NaN
Significant differences found for Average Accuracy, performing post-hoc analysis...



  if anova_result['PR(>F)'][0] < 0.05:


         Multiple Comparison of Means - Tukey HSD, FWER=0.05         
      group1          group2   meandiff p-adj   lower   upper  reject
---------------------------------------------------------------------
backwarddifference       basen   0.0022    1.0 -0.1331  0.1375  False
backwarddifference      binary  -0.0044    1.0 -0.1397  0.1308  False
backwarddifference    catboost   0.0178    1.0 -0.1175  0.1531  False
backwarddifference   frequency   0.0022    1.0 -0.1331  0.1375  False
backwarddifference     hashing  -0.0044    1.0 -0.1397  0.1308  False
backwarddifference     helmert   0.0022    1.0 -0.1331  0.1375  False
backwarddifference  jamesstein   0.0111    1.0 -0.1242  0.1464  False
backwarddifference       label   0.0033    1.0  -0.132  0.1386  False
backwarddifference leaveoneout   0.1922 0.0002  0.0569  0.3275   True
backwarddifference         mca   0.2489    0.0  0.1136  0.3842   True
backwarddifference   mestimate   0.0156    1.0 -0.1197  0.1508  False
backwarddifference  

  if anova_result['PR(>F)'][0] < 0.05:


         Multiple Comparison of Means - Tukey HSD, FWER=0.05         
      group1          group2   meandiff p-adj   lower   upper  reject
---------------------------------------------------------------------
backwarddifference       basen  -0.0111    1.0 -0.1531  0.1309  False
backwarddifference      binary   0.0178    1.0 -0.1242  0.1598  False
backwarddifference    catboost   0.0044    1.0 -0.1375  0.1464  False
backwarddifference   frequency  -0.0122    1.0 -0.1542  0.1298  False
backwarddifference     hashing    -0.07 0.9367  -0.212   0.072  False
backwarddifference     helmert   0.0167    1.0 -0.1253  0.1587  False
backwarddifference  jamesstein  -0.0144    1.0 -0.1564  0.1275  False
backwarddifference       label   0.0033    1.0 -0.1387  0.1453  False
backwarddifference leaveoneout   0.2044 0.0002  0.0625  0.3464   True
backwarddifference         mca   0.2833    0.0  0.1413  0.4253   True
backwarddifference   mestimate   0.0156    1.0 -0.1264  0.1575  False
backwarddifference  

  if anova_result['PR(>F)'][0] < 0.05:


         Multiple Comparison of Means - Tukey HSD, FWER=0.05         
      group1          group2   meandiff p-adj   lower   upper  reject
---------------------------------------------------------------------
backwarddifference       basen   0.0022    1.0 -0.1331  0.1375  False
backwarddifference      binary  -0.0044    1.0 -0.1397  0.1308  False
backwarddifference    catboost   0.0178    1.0 -0.1175  0.1531  False
backwarddifference   frequency   0.0022    1.0 -0.1331  0.1375  False
backwarddifference     hashing  -0.0044    1.0 -0.1397  0.1308  False
backwarddifference     helmert   0.0022    1.0 -0.1331  0.1375  False
backwarddifference  jamesstein   0.0111    1.0 -0.1242  0.1464  False
backwarddifference       label   0.0033    1.0  -0.132  0.1386  False
backwarddifference leaveoneout   0.1922 0.0002  0.0569  0.3275   True
backwarddifference         mca   0.2489    0.0  0.1136  0.3842   True
backwarddifference   mestimate   0.0156    1.0 -0.1197  0.1508  False
backwarddifference  

  if anova_result['PR(>F)'][0] < 0.05:


         Multiple Comparison of Means - Tukey HSD, FWER=0.05         
      group1          group2   meandiff p-adj   lower   upper  reject
---------------------------------------------------------------------
backwarddifference       basen  -0.0044    1.0 -0.1395  0.1306  False
backwarddifference      binary  -0.0056    1.0 -0.1406  0.1295  False
backwarddifference    catboost   0.0178    1.0 -0.1172  0.1528  False
backwarddifference   frequency  -0.0044    1.0 -0.1395  0.1306  False
backwarddifference     hashing  -0.0456 0.9984 -0.1806  0.0895  False
backwarddifference     helmert     0.01    1.0  -0.125   0.145  False
backwarddifference  jamesstein  -0.0011    1.0 -0.1361  0.1339  False
backwarddifference       label      0.0    1.0  -0.135   0.135  False
backwarddifference leaveoneout   0.2044 0.0001  0.0694  0.3395   True
backwarddifference         mca   0.2833    0.0  0.1483  0.4184   True
backwarddifference   mestimate   0.0111    1.0 -0.1239  0.1461  False
backwarddifference  

In [5]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import MultiComparison

# Load the dataset
df = pd.read_csv('performance_all.csv')

def perform_anova_and_posthoc(dataframe, metric, file_handle):
    # Fit the model, using Q() to handle spaces in column names
    model = ols(f'Value ~ C(Q("Encoding Method"))', data=dataframe[dataframe['Metric'] == metric]).fit()
    
    # Perform ANOVA
    anova_result = sm.stats.anova_lm(model, typ=2)
    anova_result['PR(>F)'] = anova_result['PR(>F)'].apply(lambda x: f'{x:.5f}')  # Format the p-values to three decimal places
    file_handle.write(f"ANOVA result for {metric}:\n{anova_result}\n\n")
    
    # Check if we reject the null hypothesis of equal averages
    if float(anova_result['PR(>F)'][0]) < 0.05:
        file_handle.write(f"Significant differences found for {metric}, performing post-hoc analysis...\n")
        # Tukey's HSD post-hoc comparison
        mc = MultiComparison(dataframe[dataframe['Metric'] == metric]['Value'], dataframe[dataframe['Metric'] == metric]['Encoding Method'])
        tukey_result = mc.tukeyhsd()
        file_handle.write(str(tukey_result) + "\n\n")
    else:
        file_handle.write(f"No significant differences found for {metric}.\n\n")

# Metrics to analyze
metrics = ['Average Accuracy', 'Average Precision', 'Average Recall', 'Average F1 score']

# Open a file to save the results
with open('statistic_anova_MC_by_encoding.txt', 'w') as file:
    for metric in metrics:
        perform_anova_and_posthoc(df, metric, file)


  if float(anova_result['PR(>F)'][0]) < 0.05:
  if float(anova_result['PR(>F)'][0]) < 0.05:
  if float(anova_result['PR(>F)'][0]) < 0.05:
  if float(anova_result['PR(>F)'][0]) < 0.05:


In [6]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import MultiComparison

# Load the dataset
df = pd.read_csv('performance_all.csv')

def perform_anova_and_posthoc(dataframe, metric, file_handle):
    # Adjust the formula to include ML Method
    model = ols(f'Value ~ C(Q("ML Method"))', data=dataframe[dataframe['Metric'] == metric]).fit()
    
    # Perform ANOVA
    anova_result = sm.stats.anova_lm(model, typ=2)
    anova_result['PR(>F)'] = anova_result['PR(>F)'].apply(lambda x: f'{x:.5f}')  # Format the p-values to three decimal places
    file_handle.write(f"ANOVA result for {metric} with respect to ML Method:\n{anova_result}\n\n")
    
    # Check if we reject the null hypothesis of equal averages
    if float(anova_result['PR(>F)'][0]) < 0.05:
        file_handle.write(f"Significant differences found for {metric} among ML Methods, performing post-hoc analysis...\n")
        # Tukey's HSD post-hoc comparison
        mc = MultiComparison(dataframe[dataframe['Metric'] == metric]['Value'], dataframe[dataframe['Metric'] == metric]['ML Method'])
        tukey_result = mc.tukeyhsd()
        file_handle.write(str(tukey_result) + "\n\n")
    else:
        file_handle.write(f"No significant differences found for {metric} among ML Methods.\n\n")

# Metrics to analyze
metrics = ['Average Accuracy', 'Average Precision', 'Average Recall', 'Average F1 score']

# Open a file to save the results
with open('statistic_anova_MC_by_ML.txt', 'w') as file:
    for metric in metrics:
        perform_anova_and_posthoc(df, metric, file)


  if float(anova_result['PR(>F)'][0]) < 0.05:
  if float(anova_result['PR(>F)'][0]) < 0.05:
  if float(anova_result['PR(>F)'][0]) < 0.05:
  if float(anova_result['PR(>F)'][0]) < 0.05:
